diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000000..b8ae386167 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,14 @@ +# Commits listed here are skipped by `git blame` so that mechanical, whole-tree +# reformats do not obscure the author who actually wrote each line. +# +# GitHub honors this file automatically in the web blame view. For `git blame` +# on the command line, opt in once per clone: +# +# git config blame.ignoreRevsFile .git-blame-ignore-revs +# +# When adding a new entry, include a one-line comment above the SHA explaining +# what the commit did and why it should be skipped. Only mechanical reformats +# belong here -- never use this to hide substantive changes. + +# Apply Palantir Java Format to entire codebase (#1761) +4b75dd524198dea5b789fd383f99ce974510fb1d diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 64fe051ba9..4f1a4eb7f6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -21,6 +21,8 @@ jobs: name: Java ${{ matrix.Java }} build and test steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 # full history + tags so palantir/git-version sees the latest release tag - name: Set up java ${{ matrix.Java }} uses: actions/setup-java@v3 with: @@ -49,6 +51,8 @@ jobs: name: Tests that require external APIs steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 # full history + tags so palantir/git-version sees the latest release tag - name: Set up java 17 uses: actions/setup-java@v3 with: @@ -67,11 +71,30 @@ jobs: with: name: test-results-external-apis path: build/reports/tests + formatCheck: + runs-on: ubuntu-latest + name: Java Format Check + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # full history + tags so palantir/git-version sees the latest release tag + - name: Set up java 17 + uses: actions/setup-java@v3 + with: + java-version: '17' + distribution: 'adopt' + cache: gradle + - name: Grant execute permission for gradlew + run: chmod +x gradlew + - name: Verify formatting + run: ./gradlew spotlessCheck spotBugs: runs-on: ubuntu-latest name: SpotBugs steps: - uses: actions/checkout@v3 + with: + fetch-depth: 0 # full history + tags so palantir/git-version sees the latest release tag - name: Set up java 17 uses: actions/setup-java@v3 with: diff --git a/.gitignore b/.gitignore index 03a8d6d509..606b65033a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,6 @@ htsjdk.iws .command_tmp atlassian-ide-plugin.xml -/htsjdk.version.properties /test-output/ .DS_Store diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index f00fe8b27e..0000000000 --- a/.travis.yml +++ /dev/null @@ -1,56 +0,0 @@ -language: java -dist: trusty -sudo: true -services: - - docker -before_cache: - - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock -cache: - directories: - - $HOME/.gradle/caches/ - - $HOME/.gradle/wrapper/ - - $HOME/.m2 -env: - global: - - HTSJDK_SAMTOOLS_BIN=/usr/bin/samtools -jdk: - - oraclejdk8 - - openjdk8 - - openjdk11 -matrix: - fast_finish: true - allow_failures: - - env: TEST_TYPE=EXTERNAL_APIS - - env: TEST_TYPE=FTP - include: - - jdk: oraclejdk8 - env: TEST_TYPE=EXTERNAL_APIS - - jdk: oraclejdk8 - env: TEST_TYPE=FTP - - jdk: openjdk8 - env: SPOT_BUGS=true - -before_install: - - scripts/install-samtools.sh - - scripts/htsget-scripts/start-htsget-test-server.sh - -script: - - if [[ $SPOT_BUGS == "true" ]]; then - ./gradlew spotBugsMain spotBugsTest; - elif [[ $TEST_TYPE == "FTP" ]]; then - ./gradlew testFTP jacocoTestReport; - elif [[ $TEST_TYPE == "EXTERNAL_APIS" ]]; then - ./gradlew testExternalApis jacocoTestReport; - else - ./gradlew test jacocoTestReport; - fi - -after_success: - - bash <(curl -s https://raw.githubusercontent.com/broadinstitute/codecov-bash-uploader/main/codecov-verified.bash) - - echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'"; - echo "JAVA_HOME='$JAVA_HOME'"; - if [ "$TRAVIS_BRANCH" == "master" ]; then - if [[ $JAVA_HOME = *java-8-openjdk* ]]; then - ./gradlew publish; - fi; - fi; diff --git a/CHANGELOG.md b/CHANGELOG.md index 3472be10fc..83f6c1035b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,132 @@ early infrastructure for a plugin-based codec framework and resource bundles. --- +## 5.0.0 + +Major release. + +### Headlines + +- **CRAM 3.1 write support** (the culmination of the read-side codec work in 4.2.0 and the reader wiring in 4.3.0 — htsjdk can now produce CRAM 3.1 files that are interoperable with samtools/htslib). +- **CRAM 3.1 is now the default write version** (previously 3.0). On the same input, files written with the new default `NORMAL` profile are roughly 36% smaller and encode 18-20% faster than what htsjdk 4.3 produced with its `FAST` (3.0) default. +- **Major speed-ups across the BAM and CRAM read/write paths** vs htsjdk 4.3.0. Measured on AWS m8gd / m8id (single thread, 32.7M-read input), the headline wins are: BAM write 50-58% faster, CRAM encode (FAST) 41-47% faster, CRAM read 42-46% faster, BAM read 30-31% faster. +- **`jlibdeflate` is now the default DEFLATE engine** ([jlibdeflate](https://github.com/fulcrumgenomics/jlibdeflate) wrapping native libdeflate); falls back to the JDK `Deflater`/`Inflater` if the native library cannot be loaded. +- **Slimmed-down runtime dependency tree** (SRA support removed, Nashorn moved to an opt-in dependency, several stale or misleading dependency declarations cleaned up). +- **Enforced automatic code formatting** via Palantir Java Format on every build. +- **Unit test improvements**: pass/fail stats now reported correctly when run via Gradle, and total suite runtime massively reduced (now 2-3 minutes). + +### ⚠️ Breaking changes + +Consumers should review these before upgrading. + +- **SRA support removed.** All `htsjdk.samtools.sra.*` types, `SRAFileReader`, `SRAIterator`, + `SRAIndex`, `SamInputResource.of(SRAAccession)`, `SamReader.Type.SRA_TYPE`, and the + `InputResource.Type.SRA_ACCESSION` enum value have been deleted. The + `gov.nih.nlm.ncbi:ngs-java` dependency (and the `samjdk.sra_libraries_download` system + property) are gone. Consumers needing SRA access must use NCBI's tooling or a different + library (#1774). +- **Nashorn is no longer a transitive runtime dependency.** The `JavascriptSamRecordFilter` + and `JavascriptVariantFilter` classes still exist but htsjdk no longer ships + `org.openjdk.nashorn:nashorn-core` (or its 5 ASM transitives) on consumers' runtime + classpath. Consumers who use the JavaScript filter classes must add + `org.openjdk.nashorn:nashorn-core:15.7` (or another JSR-223 `"js"` engine) to their own + runtime classpath; the no-engine error message names the artifact and prints both Gradle + and Maven coordinates (#1775). +- **`SAMRecord.toString()` now returns the full SAM-format string** for the record (all 11 + mandatory SAM fields plus tags), replacing the previous minimal summary. The previous + output was usually insufficient to debug failures in `println()` calls or test-assertion + messages; the new output is the same line you would see in a SAM file. Anything that + parses or asserts against the exact old format will need updating (#1762). +- **CRAM slice headers no longer include the optional content digest tags** (BD/SD/B5/S5/B1/S1). + Matches htslib/samtools behavior. Block-level CRC32 (required since CRAM 3.0) still + provides data integrity. Technically a wire-format change but with zero known practical + impact, since no known tools consume these tags. +- **Default CRAM version for writing is now 3.1** (was 3.0). CRAM 3.0 readers will not be + able to read newly-produced files; pass an explicit version to the writer if you need 3.0 + output. + +### CRAM 3.1 Write Support + +- Enable CRAM 3.1 writing with all spec codecs: rANS Nx16, adaptive arithmetic Range coder, FQZComp, Name Tokenisation, and STRIPE +- Add configurable compression profiles (FAST, NORMAL, SMALL, ARCHIVE) with trial compression for automatic codec selection +- Implement `TrialCompressor` to replace ad-hoc triple-compression for tags and align trial candidates with htslib +- Add `GzipCodec` for direct Deflater/Inflater GZIP compression, wired into CRAM as a codec option +- Strip NM/MD tags on CRAM encode and regenerate on decode, matching htslib behavior +- Implement attached (same-slice) mate pair resolution +- Align DataSeries content IDs with htslib for cross-implementation debugging +- Remove content digest tags (BD/SD/B5/S5/B1/S1) from CRAM slice headers, matching htslib/samtools behavior (see Breaking changes) +- Default CRAM version for writing is now 3.1 (was 3.0; see Breaking changes) +- Add `CramConverter` command-line tool for testing and benchmarking CRAM write profiles +- Add cross-implementation CRAM validation pipeline (`validation/`) for round-tripping against samtools/htslib +- Add bases-per-slice threshold to bound slice memory when writing long reads +- Refine `CompressionHeader` map serialization +- Resolve a pile of in-tree `TODO`s in CRAM structure classes + +### CRAM correctness and cross-implementation fixes + +These fixes apply to both reading and writing CRAM and substantially improve interoperability with samtools/htslib. + +- Fix CRAM `TLEN` computation to match htslib (cross-tool comparisons of the same input now produce matching `TLEN` values) +- Fix `CIGAR` reconstruction when the sequence is `*` (`CF_UNKNOWN_BASES`) +- Fix `=`/`X` `CIGAR` op comparison in cross-implementation tests +- Fix CRAM archive header overflow on large containers +- Fix crash when reading a CRAM container with no slices +- Fix unmapped-read query in the hts-specs compliance harness +- Document the supplementary/secondary read-name resolution limitation in the writer + +### Codec and Compression Optimizations + +- Refactor and optimize all rANS codecs: byte-array API, backwards-write encoding, and general simplifications +- Optimize Name Tokeniser encoder: replace regex with hand-written parser; add per-type flags, STRIPE support, stream deduplication, and all-MATCH elimination +- Optimize FQZComp, Range coder, and rANS encoder hot paths +- Tune NORMAL profile codec assignments based on empirical compression testing + +### Performance + +- Integrate [jlibdeflate](https://github.com/fulcrumgenomics/jlibdeflate) for native libdeflate-backed DEFLATE compression and decompression. Used by default; falls back to the JDK Deflater/Inflater if the native library cannot be loaded (#1768) +- A few targeted optimizations to the BAM decoding path yielding ~6-7% improvement in BAM read performance (#1764) +- Replace `ByteArrayInputStream`/`ByteArrayOutputStream` with unsynchronized `CRAMByteReader`/`CRAMByteWriter` to eliminate synchronization overhead in CRAM +- Fuse read base restoration, CIGAR building, and NM/MD computation into a single pass during CRAM decode +- Cache tag key metadata to eliminate per-record `String` allocation during CRAM decode +- Pool `RANSNx16Decode` instances in the Name Tokeniser +- Optimize BAM nibble-to-ASCII base decoding with a bulk lookup table + +### Bug fixes + +- Fix LTF8 9-byte write bug: wrong bit shift (`>> 28` instead of `>> 24`) corrupted the high byte of large CRAM offsets (#1765) +- Fix `SamLocusIterator` so that read position is not incorrectly offset (#1758) +- Fix asymmetric `SamPairUtil.getPairOrientation` on dovetail pairs (#1771) +- Catch `UnsatisfiedLinkError` when loading the snappy native library so failure to load it does not abort downstream consumers (#1753) + +### Build, tooling, and dependency clean-up + +- **Code formatting:** apply [Palantir Java Format](https://github.com/palantir/palantir-java-format) to the entire codebase and enforce it on every build via [Spotless](https://github.com/diffplug/spotless). `compileJava` auto-formats source in place; CI separately runs `spotlessCheck` as the enforcement boundary. See `CONTRIBUTING.md` for details, including the `.git-blame-ignore-revs` opt-in for the bulk-format commit (#1761) +- **Maven Central publishing migrated** from the legacy OSSRH endpoint to the new [Sonatype Central Portal](https://central.sonatype.com), via the [NMCP Gradle plugin](https://github.com/GradleUp/nmcp). Consumer-visible groupId/artifactId/version coordinates are unchanged (#1769) +- **Snapshot versioning** now embeds the short commit hash (e.g. `5.0.0-23c681a-SNAPSHOT`) so each snapshot is a distinct, pinnable artifact rather than a moving Maven SNAPSHOT (#1772) +- **Test runner** now correctly reports failures rather than silently skipping them when a `@DataProvider` throws (#1759) +- **Existing API deprecations** cleaned up across `htsjdk.samtools` and `htsjdk.variant` (#1767) +- **`commons-logging` direct declaration removed.** htsjdk does not use commons-logging itself; the version pin is now expressed as a Gradle dependency constraint and only kicks in transitively when JEXL pulls it +- **Nashorn moved to `compileOnly`** — see Breaking changes +- **`gov.nih.nlm.ncbi:ngs-java` removed** — see Breaking changes (SRA support) + +### Compatibility + +- Compiled and tested against JDK 17 (CI default), 21, and 24. CI continues to build only on 17. htsjdk's published minimum remains Java 17 (set in 4.0.0) + +### Testing and Infrastructure + +- Add hts-specs CRAM 3.0 / 3.1 decode-compliance tests, plus FQZComp round-trip tests using hts-specs quality data +- Add CRAI index query correctness tests and codec round-trip property tests +- Split CRAM 3.1 fidelity tests into per-profile classes for parallel execution +- Speed up BCF2 and SeekableStream integration tests; cache test data in CRAM index test classes +- Reduce `CRAMFileBAIIndexTest` from 4 to 2 slice-size variants, sampling every 200th +- Downsample the CEUTrio test CRAM from ~654K to ~150K records (47 MB → 11 MB) +- Reduce memory pressure in unit tests to eliminate OOM failures +- Fix thread-safety bug in `VariantContextTestProvider` causing non-deterministic test counts +- Bulk up the JavaScript filter test suites: replace 4 checked-in `.js` fixtures with 46 small inline-script tests covering all three constructors, return-type semantics, bindings, and error paths (#1775) + +--- + ## 4.3.0 (2025-05-09) Completes CRAM 3.1 read support by wiring the codec implementations (added in 4.2.0) into diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..e84d014a2e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,300 @@ +# Contributing to HTSJDK + +## Code Style + +HTSJDK uses [Palantir Java Format](https://github.com/palantir/palantir-java-format) +(applied via the [Spotless](https://github.com/diffplug/spotless) Gradle plugin) +to enforce a single, mechanical code style across the codebase. There are no +formatting knobs to configure -- the formatter is the style guide. + +Formatting is applied automatically as part of `compileJava`: every build +runs `spotlessJavaApply`, which rewrites any unformatted source in place +before compiling. In normal use you shouldn't need to invoke the formatter +yourself -- just build, and your code is formatted. If you want to format +without compiling, run: + +```bash +./gradlew spotlessApply +``` + +CI runs `./gradlew spotlessCheck` (verify-only, no mutation) so a PR with +unformatted code still fails CI -- the local auto-format is a convenience, +not the enforcement boundary. + +### Git blame and the bulk-format commit + +The codebase was reformatted in a single mechanical commit. To keep `git blame` +useful (so you see the author who actually wrote each line, not the reformat +commit), the repository ships a `.git-blame-ignore-revs` file. GitHub honors +it automatically in the web UI; for `git blame` on the command line, opt in +once per clone: + +```bash +git config blame.ignoreRevsFile .git-blame-ignore-revs +``` + +## Building + +HTSJDK uses Gradle (via the Gradle wrapper). To build: + +```bash +./gradlew jar +``` + +To run tests: + +```bash +./gradlew test +``` + +To install to your local Maven repository (e.g. for testing with downstream projects): + +```bash +./gradlew install +``` + +## Publishing to Maven Central + +HTSJDK is published to Maven Central via the [Sonatype Central Portal](https://central.sonatype.com). +The build uses the [NMCP Gradle plugin](https://github.com/GradleUp/nmcp) to handle bundle +creation and upload. + +### Prerequisites + +#### 1. Sonatype Central Portal Account and Tokens + +You need a Sonatype Central Portal account with access to the `com.github.samtools` namespace. + +Generate a user token: + +1. Log in to https://central.sonatype.com +2. Go to Account > User Token +3. Click "Generate User Token" +4. **Save the username and password immediately** -- they are shown only once + +#### 2. Configure Sonatype Credentials + +Gradle resolves project properties in this order (highest precedence first): + +| Priority | Method | Example | +|----------|--------|---------| +| 1 | Command-line `-P` flag | `-PsonatypeUsername=...` | +| 2 | Environment variable | `ORG_GRADLE_PROJECT_sonatypeUsername=...` | +| 3 | `~/.gradle/gradle.properties` | `sonatypeUsername=...` | + +**Option A: `~/.gradle/gradle.properties`** (recommended for local development) + +Add to `~/.gradle/gradle.properties`: + +```properties +sonatypeUsername= +sonatypePassword= +``` + +**Option B: Environment variables** (recommended for CI) + +```bash +export ORG_GRADLE_PROJECT_sonatypeUsername= +export ORG_GRADLE_PROJECT_sonatypePassword= +``` + +**Option C: Command line** (one-off use) + +```bash +./gradlew publishAllPublicationsToCentralPortal -Drelease=true \ + -PsonatypeUsername= \ + -PsonatypePassword= +``` + +#### 3. GPG Signing Key + +Release artifacts must be signed with a GPG key that has been published to a public keyserver. + +**Generate a new key** (if you don't have one): + +```bash +gpg --full-generate-key +``` + +- Choose RSA and RSA, 4096 bits +- Set an expiration or choose no expiration +- Choose a passphrase you will remember + +**Find your key ID:** + +```bash +gpg --list-keys --keyid-format long +``` + +The key ID is the hex string on the `pub` line, e.g. `pub rsa4096/AABBCCDD11223344`. + +**Publish to keyservers** (required -- Central Portal verifies signatures against these): + +```bash +gpg --keyserver keyserver.ubuntu.com --send-keys +gpg --keyserver keys.openpgp.org --send-keys +``` + +**Configure Gradle to use your key:** + +If you have multiple GPG keys, add to `~/.gradle/gradle.properties`: + +```properties +signing.gnupg.keyName= +``` + +If you have only one key, this is optional -- Gradle will use the default key. + +#### 4. GPG Agent and Pinentry Setup (macOS) + +The build uses `useGpgCmd()` to delegate signing to your system `gpg` command, which in turn +uses `gpg-agent` to handle passphrase prompting. This avoids storing your GPG passphrase in +plain text. + +For reliable passphrase prompting on macOS, install `pinentry-mac`: + +```bash +brew install pinentry-mac +``` + +Then configure `gpg-agent` to use it. Add to `~/.gnupg/gpg-agent.conf`: + +``` +pinentry-program /opt/homebrew/bin/pinentry-mac +``` + +Restart the agent: + +```bash +gpgconf --kill gpg-agent +``` + +Without `pinentry-mac`, you may see `Inappropriate ioctl for device` errors when Gradle +invokes `gpg` for signing, because the default pinentry cannot open a prompt from Gradle's +non-interactive process. + +You should also ensure your shell has `GPG_TTY` set. Add to your `~/.zshrc` (or `~/.bashrc`): + +```bash +export GPG_TTY=$(tty) +``` + +### Version Numbering + +The build computes the version from git state plus a single declaration in +`build.gradle`: + +```groovy +final nextVersionBump = "x" // "x" major, "x.x" minor, "x.x.x" patch +``` + +`nextVersionBump` declares the *shape* of the next planned release relative to +the most recent semver tag (e.g. `4.3.0`): + +| Bump | Most recent tag | Computed next version | +| ------- | --------------- | --------------------- | +| `x` | `4.3.0` | `5.0.0` | +| `x.x` | `4.3.0` | `4.4.0` | +| `x.x.x` | `4.3.0` | `4.3.1` | + +What the build actually publishes: + +- **Release** (`-Drelease=true`): HEAD must be on a semver-tagged commit; the + tag itself is the version (e.g. `5.0.0`). `nextVersionBump` is ignored on + release — the tag is authoritative. +- **Snapshot** (default): `--SNAPSHOT` + (e.g. `5.0.0-23c681a-SNAPSHOT`). + +The short hash in snapshot versions makes each snapshot a distinct, pinnable +artifact rather than the usual moving-target Maven SNAPSHOT — consumers can +lock to a specific commit. Trade-off: there is no plain `5.0.0-SNAPSHOT` to +depend on for "always latest." + +To see the version the build will produce: + +```bash +./gradlew -q printVersion +``` + +After cutting a release, update `nextVersionBump` if the *next* planned release +is a different shape (e.g. switch from `x` to `x.x` once you start shipping +minor releases on a stable major line). + +### Publishing a Snapshot + +Snapshots are published from any state of the repository without signing: + +```bash +./gradlew publishAllPublicationsToCentralPortalSnapshots +``` + +**Note:** Snapshot publishing to Central Portal requires that SNAPSHOT support is enabled +on the `com.github.samtools` namespace in the Central Portal settings. + +### Publishing a Release + +Releases are published from a git tag. The full process: + +#### Step 1: Tag the Release + +Make sure `nextVersionBump` in `build.gradle` matches the kind of release you +intend to ship (major / minor / patch). Then check the version and tag the +release commit: + +```bash +./gradlew -q printVersion # prints e.g. "5.0.0-23c681a-SNAPSHOT" +``` + +Strip the `--SNAPSHOT` suffix to get the tag string. For the example +above, that's `5.0.0`: + +```bash +git tag 5.0.0 +git push origin 5.0.0 +``` + +#### Step 2: Verify Locally (Dry Run) + +Build and sign all artifacts into your local Maven repository: + +```bash +git checkout X.Y.Z +./gradlew clean publishHtsjdkPublicationToMavenLocal -Drelease=true +``` + +Inspect the output: + +```bash +ls ~/.m2/repository/com/github/samtools/htsjdk/X.Y.Z/ +``` + +You should see: +- `htsjdk-X.Y.Z.jar` + `.asc` +- `htsjdk-X.Y.Z-javadoc.jar` + `.asc` +- `htsjdk-X.Y.Z-sources.jar` + `.asc` +- `htsjdk-X.Y.Z.pom` + `.asc` +- `htsjdk-X.Y.Z.module` + `.asc` + +#### Step 3: Publish to Maven Central + +```bash +./gradlew publishAllPublicationsToCentralPortal -Drelease=true +``` + +The NMCP plugin will: +1. Stage all artifacts locally +2. Generate checksums (MD5, SHA1, SHA256, SHA512) +3. Create a ZIP bundle +4. Upload to the Central Portal API +5. Wait for validation to pass +6. Automatically release to Maven Central (configured as `AUTOMATIC`) + +#### Step 4: Verify + +Artifacts typically appear on Maven Central within 15 minutes: + +``` +https://repo1.maven.org/maven2/com/github/samtools/htsjdk/X.Y.Z/ +``` + +Search index updates (e.g. on https://search.maven.org) may take up to 2 hours. diff --git a/README.md b/README.md index c82cbe6434..4310b5498e 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ manipulating HTS data. > **NOTE: _HTSJDK has only partial support for the latest Variant Call Format Specification. VCFv4.3 can be read but not written, VCFv4.4 can be read in lenient mode only, and there is no support for BCFv2.2._** +> **NOTE: _HTSJDK now supports both reading and writing CRAM 3.1 files. CRAM 3.1 write support includes all codecs defined in the specification (rANS Nx16, adaptive arithmetic Range coder, FQZComp, Name Tokenisation, and STRIPE), configurable compression profiles (FAST, NORMAL, SMALL, ARCHIVE), and trial compression for automatic codec selection. Files produced by htsjdk are interoperable with samtools/htslib._** + ### Documentation & Getting Help API documentation for all versions of HTSJDK since `1.128` are available through [javadoc.io](http://www.javadoc.io/doc/com.github.samtools/htsjdk). @@ -94,7 +96,6 @@ Broadly speaking the majority of the code is covered under the MIT license with * Much of the CRAM code is under the Apache License, Version 2 * Core `tribble` code (underlying VCF reading/writing amongst other things) is under LGPL -* Code supporting the reading/writing of SRA format is uncopyrighted & public domain ### Java Minimum Version Support Policy diff --git a/build.gradle b/build.gradle index 0eed767aa5..969ff850c4 100644 --- a/build.gradle +++ b/build.gradle @@ -9,13 +9,27 @@ plugins { id 'maven-publish' id 'signing' id 'jacoco' - id 'com.palantir.git-version' version '0.11.0' - id 'com.github.johnrengelman.shadow' version '8.1.1' - id 'com.github.spotbugs' version "5.0.13" + id 'com.palantir.git-version' version '5.0.0' + id 'com.gradleup.shadow' version '9.4.1' + id 'com.github.spotbugs' version "6.4.8" + id 'com.gradleup.nmcp' version '1.4.4' + id 'com.diffplug.spotless' version '8.4.0' +} + +spotless { + java { + target 'src/**/*.java' + palantirJavaFormat() + } } repositories { mavenCentral() + maven { + name = 'sonatypeSnapshots' + url = 'https://central.sonatype.com/repository/maven-snapshots/' + mavenContent { snapshotsOnly() } + } } jacocoTestReport { @@ -29,14 +43,31 @@ jacocoTestReport { } dependencies { - implementation 'commons-logging:commons-logging:1.3.0' + implementation 'com.fulcrumgenomics:jlibdeflate:0.1.0' implementation "org.xerial.snappy:snappy-java:1.1.10.5" implementation 'org.apache.commons:commons-compress:1.26.0' implementation 'org.tukaani:xz:1.9' implementation "org.json:json:20231013" - implementation 'org.openjdk.nashorn:nashorn-core:15.4' - - api "gov.nih.nlm.ncbi:ngs-java:2.9.0" + + // commons-jexl 2.1.1 pulls commons-logging:1.1.1 (released 2007). htsjdk has no direct + // need for commons-logging itself, so we publish a version constraint rather than a real + // dependency: it kicks in only if commons-logging is pulled transitively, and bumps it to + // a maintained version. Drop this if commons-jexl is ever upgraded past 2.1.1. + constraints { + implementation('commons-logging:commons-logging:1.3.0') { + because 'jexl 2.1.1 pulls commons-logging 1.1.1 transitively; pin a maintained version' + } + } + + // Nashorn is the JSR-223 "js" engine used by the optional JavaScript filter classes + // (htsjdk.samtools.filter.JavascriptSamRecordFilter, htsjdk.variant.variantcontext.filter.JavascriptVariantFilter). + // It's compileOnly so downstream consumers who don't use those filter classes don't pay the + // cost of nashorn-core + 5 ASM artifacts on their runtime classpath. Consumers who do use + // them must add nashorn-core to their own runtime classpath; see the error message thrown + // by AbstractJavascriptFilter when no JS engine is found. + compileOnly 'org.openjdk.nashorn:nashorn-core:15.7' + testImplementation 'org.openjdk.nashorn:nashorn-core:15.7' + api "org.apache.commons:commons-jexl:2.1.1" testImplementation 'org.testng:testng:7.8.0' @@ -53,17 +84,87 @@ java { withSourcesJar() } +// Versioning +// ---------- +// The version of the *next* planned release is computed from the most recent +// release tag plus a "bump shape" declared below: +// "x" -> bump the major component (e.g. 4.3.0 -> 5.0.0) +// "x.x" -> bump the minor component (e.g. 4.3.0 -> 4.4.0) +// "x.x.x" -> bump the patch component (e.g. 4.3.0 -> 4.3.1) +// +// Release builds (-Drelease=true) require HEAD to be tagged exactly with the +// computed next version, and the published version is just that string (e.g. "5.0.0"). +// +// Snapshot builds (the default) publish "--SNAPSHOT" +// (e.g. "5.0.0-abc1234-SNAPSHOT"). Including the short hash means each snapshot +// is a distinct, pinnable artifact rather than the typical Maven "moving target" +// SNAPSHOT, so consumers can lock to a specific commit. +// +// To change the planned bump (e.g. after a release lands), update nextVersionBump +// below and commit. +final nextVersionBump = "x" + final isRelease = Boolean.getBoolean("release") -final gitVersion = gitVersion().replaceAll(".dirty", "") -version = isRelease ? gitVersion : gitVersion + "-SNAPSHOT" +final details = versionDetails() +final lastTag = details.lastTag +if (lastTag == null) { + throw new GradleException("No release tags found; cannot determine version.") +} +final semverPattern = /^(\d+)\.(\d+)\.(\d+)$/ +final tagMatcher = lastTag =~ semverPattern +if (!tagMatcher.matches()) { + throw new GradleException( + "Most recent tag '${lastTag}' is not in MAJOR.MINOR.PATCH form.") +} + +if (isRelease) { + // Release: HEAD must be exactly on a semver-tagged commit AND the working tree + // must be clean; the tag IS the version. (nextVersionBump is informational/for + // snapshots only — the tag is authoritative.) + if (details.commitDistance != 0) { + throw new GradleException( + "Release requested but HEAD is not on a tagged commit " + + "(lastTag=${lastTag}, commitDistance=${details.commitDistance}).") + } + if (!details.isCleanTag) { + throw new GradleException( + "Release requested but the working tree has uncommitted changes; " + + "commit, stash, or reset before publishing.") + } + version = lastTag +} else { + // Snapshot: compute the next planned version from the most recent tag + bump shape. + final lastMajor = tagMatcher.group(1).toInteger() + final lastMinor = tagMatcher.group(2).toInteger() + final lastPatch = tagMatcher.group(3).toInteger() + final String nextVersion + switch (nextVersionBump) { + case "x": nextVersion = "${lastMajor + 1}.0.0"; break + case "x.x": nextVersion = "${lastMajor}.${lastMinor + 1}.0"; break + case "x.x.x": nextVersion = "${lastMajor}.${lastMinor}.${lastPatch + 1}"; break + default: + throw new GradleException( + "Unrecognized nextVersionBump '${nextVersionBump}'; expected 'x', 'x.x', or 'x.x.x'.") + } + version = "${nextVersion}-${details.gitHash.substring(0, 7)}-SNAPSHOT" +} -logger.info("build for version:" + version) +logger.info("build for version: ${version}") +// Note: the palantir git-version plugin already provides a 'printVersion' task +// that prints the resolved project.version — useful for CI scripts and for +// figuring out what tag to apply for a release. group = 'com.github.samtools' defaultTasks 'jar' tasks.withType(JavaCompile).configureEach { options.encoding = 'UTF-8' + // Auto-format source as part of every compile. This rewrites unformatted + // files in place rather than failing -- contributors don't have to remember + // to run `./gradlew spotlessApply` themselves. spotlessJavaApply is fast + // (sub-second warm) thanks to Gradle's up-to-date checking. CI separately + // runs `spotlessCheck` (verify-only) so unformatted code can't slip past. + dependsOn 'spotlessJavaApply' } tasks.withType(Javadoc).configureEach { @@ -88,36 +189,39 @@ tasks.withType(Test).configureEach { task -> // set heap size for the test JVM(s) task.minHeapSize = "1G" - task.maxHeapSize = "6G" + task.maxHeapSize = "12G" task.jvmArgs '-Djava.awt.headless=true' //this prevents awt from displaying a java icon while the tests are running int count = 0 // listen to events in the test execution lifecycle - - beforeTest { descriptor -> - count++ - if (count % 200 == 0) { - logger.lifecycle("Finished " + Integer.toString(count++) + " tests") - } - } - - testLogging { - testLogging { - events "skipped", "failed" - exceptionFormat = "full" - } - afterSuite { desc, result -> - if (!desc.parent) { // will match the outermost suite + task.addTestListener(new TestListener() { + void beforeSuite(TestDescriptor suite) {} + void afterSuite(TestDescriptor suite, TestResult result) { + if (!suite.parent) { println "Results: ${result.resultType} (${result.testCount} tests, ${result.successfulTestCount} successes, ${result.failedTestCount} failures, ${result.skippedTestCount} skipped)" } } + void beforeTest(TestDescriptor descriptor) { + count++ + if (count % 200 == 0) { + logger.lifecycle("Finished " + Integer.toString(count) + " tests") + } + } + void afterTest(TestDescriptor descriptor, TestResult result) {} + }) + + testLogging { + events "skipped", "failed" + exceptionFormat = "full" } } tasks.register('testWithDefaultReference', Test) { description = "Run tests with a default reference File" + testClassesDirs = sourceSets.test.output.classesDirs + classpath = sourceSets.test.runtimeClasspath jvmArgs += '-Dsamjdk.reference_fasta=src/test/resources/htsjdk/samtools/cram/ce.fa' useTestNG { @@ -127,6 +231,8 @@ tasks.register('testWithDefaultReference', Test) { tasks.register('testWithOptimisticVCF4_4', Test) { description = "Run tests with optimistic VCF 4.4 reading" + testClassesDirs = sourceSets.test.output.classesDirs + classpath = sourceSets.test.runtimeClasspath jvmArgs += '-Dsamjdk.optimistic_vcf_4_4=true' useTestNG { @@ -135,22 +241,24 @@ tasks.register('testWithOptimisticVCF4_4', Test) { } test { - description = "Runs the unit tests other than the SRA tests" + description = "Runs the unit tests other than external-API tests" useTestNG { if (OperatingSystem.current().isUnix()) { - excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena", "htsget" + excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "ena", "htsget" } else { - excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "sra", "ena", "htsget", "unix" + excludeGroups "slow", "broken", "defaultReference", "optimistic_vcf_4_4", "ftp", "http", "ena", "htsget", "unix" } parallel = "classes" - threadCount = 2 * Runtime.runtime.availableProcessors() + threadCount = Runtime.runtime.availableProcessors() } } dependsOn testWithDefaultReference, testWithOptimisticVCF4_4 tasks.register('testFTP', Test) { description = "Runs the tests that require connection to a remote ftp server" + testClassesDirs = sourceSets.test.output.classesDirs + classpath = sourceSets.test.runtimeClasspath useTestNG { includeGroups "ftp" excludeGroups "slow", "broken" @@ -158,41 +266,48 @@ tasks.register('testFTP', Test) { } tasks.register('testExternalApis', Test) { - description = "Run the SRA, ENA, and HTTP tests (tests that interact with external APIs)" - jvmArgs += '-Dsamjdk.sra_libraries_download=true' + description = "Run the ENA and HTTP tests (tests that interact with external APIs)" + testClassesDirs = sourceSets.test.output.classesDirs + classpath = sourceSets.test.runtimeClasspath useTestNG { - includeGroups "sra", "http", "ena" + includeGroups "http", "ena" excludeGroups "slow", "broken" } } if(project == rootProject) { wrapper { - gradleVersion = '8.5' + gradleVersion = '9.4.1' } } spotbugs { - reportLevel = 'high' + reportLevel = com.github.spotbugs.snom.Confidence.valueOf('HIGH') excludeFilter = file('gradle/spotbugs-exclude.xml') } spotbugsMain { reports { - xml.enabled = false - html.enabled = true + xml.required = false + html.required = true } } spotbugsTest { reports { - xml.enabled = false - html.enabled = true + xml.required = false + html.required = true } } +// Fat JAR with all dependencies for standalone CLI tools (CramConverter, CramComparison, etc.) +shadowJar { + archiveClassifier.set('all') + mergeServiceFiles() +} + publishing { publications { htsjdk(MavenPublication) { @@ -228,25 +343,36 @@ publishing { } } - repositories { - maven { - credentials { - username = isRelease ? project.findProperty("sonatypeUsername") : System.env.ARTIFACTORY_USERNAME - password = isRelease ? project.findProperty("sonatypePassword") : System.env.ARTIFACTORY_PASSWORD - } - def release = "https://oss.sonatype.org/service/local/staging/deploy/maven2/" - def snapshot = "https://broadinstitute.jfrog.io/broadinstitute/libs-snapshot-local/" - url = isRelease ? release : snapshot - } - } } /** * Sign non-snapshot releases with our secret key. This should never need to be invoked directly. + * Uses the system gpg command so that gpg-agent can prompt for the passphrase. */ signing { - required { isRelease && gradle.taskGraph.hasTask("publishHtsjdkPublicationToMavenRepository") } - sign publishing.publications.htsjdk + useGpgCmd() + sign(publishing.publications.htsjdk) + required = { isRelease && gradle.taskGraph.hasTask("publishAllPublicationsToCentralPortal") } + tasks.withType(Sign).configureEach { + onlyIf { isRelease && gradle.taskGraph.hasTask("publishAllPublicationsToCentralPortal") } + } +} + +/** + * Publish to Maven Central via Sonatype Central Portal. + * Credentials: generate user tokens at https://central.sonatype.com (Account > User Token) + * and store as sonatypeUsername/sonatypePassword in ~/.gradle/gradle.properties + * or as ORG_GRADLE_PROJECT_sonatypeUsername/ORG_GRADLE_PROJECT_sonatypePassword env vars. + * + * Release: ./gradlew publishAllPublicationsToCentralPortal -Drelease=true + * Snapshot: ./gradlew publishAllPublicationsToCentralPortalSnapshots + */ +nmcp { + publishAllPublicationsToCentralPortal { + username = project.findProperty("sonatypeUsername") + password = project.findProperty("sonatypePassword") + publishingType = "AUTOMATIC" + } } tasks.register('install') { dependsOn publishToMavenLocal } diff --git a/build.xml b/build.xml deleted file mode 100755 index 59bebded33..0000000000 --- a/build.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - - - - - ANT IS DEPRECATED FOR BUILDING HTSJDK - - Please switch to using gradlew - - Examples: - compile htsjdk or it's tests - ./gradlew compileJava - ./gradlew compileTest - - build a jar - ./gradlew jar - - build a jar, along with source and document jars - ./gradlew build - - build a jar that packages all of htsjdk's dependencies in a single jar - ./gradlew shadowJar - - run tests, or a single test, or run a test and wait for the debugger - ./gradlew test - ./gradlew test --tests "*AlleleUnitTest" - ./gradlew test --tests "*AlleleUnitTest" --debug-jvm - - clean the project directory - ./gradlew clean - - see an exhaustive list of all available targets - ./gradlew tasks - - - diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index 943f0cbfa7..d64cd49177 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 3499ded5c1..c61a118f7d 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-9.4.1-bin.zip networkTimeout=10000 +validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew index 65dcd68d65..1aa94a4269 100755 --- a/gradlew +++ b/gradlew @@ -83,10 +83,8 @@ done # This is normally unused # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} -APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum @@ -133,10 +131,13 @@ location of your Java installation." fi else JAVACMD=java - which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the location of your Java installation." + fi fi # Increase the maximum file descriptors if we can. @@ -144,7 +145,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then case $MAX_FD in #( max*) # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 MAX_FD=$( ulimit -H -n ) || warn "Could not query maximum file descriptor limit" esac @@ -152,7 +153,7 @@ if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then '' | soft) :;; #( *) # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC3045 + # shellcheck disable=SC2039,SC3045 ulimit -n "$MAX_FD" || warn "Could not set maximum file descriptor limit to $MAX_FD" esac @@ -197,11 +198,15 @@ if "$cygwin" || "$msys" ; then done fi -# Collect all arguments for the java command; -# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of -# shell script including quotes and variable substitutions, so put them in -# double quotes to make sure that they get re-expanded; and -# * put everything else in single quotes, so that it's not re-expanded. + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. set -- \ "-Dorg.gradle.appname=$APP_BASE_NAME" \ diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000000..6689b85bee --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,92 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/java-style-eclipse.xml b/java-style-eclipse.xml deleted file mode 100644 index 4f9d8d6b9c..0000000000 --- a/java-style-eclipse.xml +++ /dev/null @@ -1,337 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/java-style-intellij.xml b/java-style-intellij.xml deleted file mode 100644 index ac955eb93d..0000000000 --- a/java-style-intellij.xml +++ /dev/null @@ -1,598 +0,0 @@ - - - - - - diff --git a/src/main/java/htsjdk/annotations/BetaAPI.java b/src/main/java/htsjdk/annotations/BetaAPI.java index 1d4e5beaec..cee4709c45 100644 --- a/src/main/java/htsjdk/annotations/BetaAPI.java +++ b/src/main/java/htsjdk/annotations/BetaAPI.java @@ -1,13 +1,13 @@ package htsjdk.annotations; +import static java.lang.annotation.ElementType.*; + import java.lang.annotation.Documented; import java.lang.annotation.Inherited; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; -import static java.lang.annotation.ElementType.*; - /** * Annotation indicating that a package, class, method, or type is release level "BETA", and is not part * of the stable public API. BETA APIs are published for evaluation, and may be changed or removed without a @@ -17,5 +17,4 @@ @Retention(RetentionPolicy.SOURCE) @Inherited @Documented -public @interface BetaAPI { -} +public @interface BetaAPI {} diff --git a/src/main/java/htsjdk/annotations/InternalAPI.java b/src/main/java/htsjdk/annotations/InternalAPI.java index faf0730a01..1eb73861c8 100644 --- a/src/main/java/htsjdk/annotations/InternalAPI.java +++ b/src/main/java/htsjdk/annotations/InternalAPI.java @@ -1,13 +1,13 @@ package htsjdk.annotations; +import static java.lang.annotation.ElementType.*; + import java.lang.annotation.Documented; import java.lang.annotation.Inherited; import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; import java.lang.annotation.Target; -import static java.lang.annotation.ElementType.*; - /** * Annotation indicating that a package, class, method, or type is release level "internal", even if the * access modifier is "public". {@link InternalAPI} types are intended to be for internal use only, and @@ -18,5 +18,4 @@ @Retention(RetentionPolicy.SOURCE) @Inherited @Documented -public @interface InternalAPI { -} +public @interface InternalAPI {} diff --git a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTACodecV1_0.java b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTACodecV1_0.java index db3d633407..fc7a5f3d3f 100644 --- a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTACodecV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTACodecV1_0.java @@ -1,22 +1,21 @@ package htsjdk.beta.codecs.hapref.fasta; +import htsjdk.beta.exception.HtsjdkIOException; +import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.io.bundle.SignatureStream; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.hapref.HaploidReferenceCodec; import htsjdk.beta.plugin.hapref.HaploidReferenceDecoder; import htsjdk.beta.plugin.hapref.HaploidReferenceDecoderOptions; import htsjdk.beta.plugin.hapref.HaploidReferenceEncoder; -import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.beta.exception.HtsjdkIOException; -import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.plugin.hapref.HaploidReferenceEncoderOptions; -import htsjdk.io.IOPath; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.hapref.HaploidReferenceFormats; +import htsjdk.io.IOPath; import htsjdk.samtools.util.BlockCompressedStreamConstants; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.util.zip.GZIPInputStream; @@ -38,7 +37,9 @@ public String getFileFormat() { } @Override - public int getSignatureLength() { return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; } + public int getSignatureLength() { + return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; + } @Override public boolean canDecodeSignature(final SignatureStream signatureStream, final String sourceName) { @@ -46,15 +47,14 @@ public boolean canDecodeSignature(final SignatureStream signatureStream, final S ValidationUtils.nonNull(sourceName, "sourceName"); try { - final InputStream wrappedInputStream = IOUtil.isGZIPInputStream(signatureStream) ? - new GZIPInputStream(signatureStream) : - signatureStream; + final InputStream wrappedInputStream = + IOUtil.isGZIPInputStream(signatureStream) ? new GZIPInputStream(signatureStream) : signatureStream; int ch = wrappedInputStream.read(); if (ch == -1) { - throw new HtsjdkIOException( - String.format("Codec %s failed probing signature for resource %s", this.getDisplayName(), sourceName)); + throw new HtsjdkIOException(String.format( + "Codec %s failed probing signature for resource %s", this.getDisplayName(), sourceName)); } - return ((char) ch) == '>'; // for FASTA, this is all we have to go on... + return ((char) ch) == '>'; // for FASTA, this is all we have to go on... } catch (IOException e) { throw new HtsjdkIOException(String.format("Failure reading signature from stream for %s", sourceName), e); } @@ -63,13 +63,13 @@ public boolean canDecodeSignature(final SignatureStream signatureStream, final S @Override public boolean canDecodeURI(final IOPath ioPath) { ValidationUtils.nonNull(ioPath, "ioPath"); - return FileExtensions.FASTA.stream().anyMatch(ext-> ioPath.hasExtension(ext)); + return FileExtensions.FASTA.stream().anyMatch(ext -> ioPath.hasExtension(ext)); } - @Override + @Override public HaploidReferenceDecoder getDecoder(final Bundle inputBundle, final HaploidReferenceDecoderOptions options) { - ValidationUtils.nonNull(inputBundle, "input bundle"); - ValidationUtils.nonNull(options, "reference encoder options"); + ValidationUtils.nonNull(inputBundle, "input bundle"); + ValidationUtils.nonNull(options, "reference encoder options"); return new FASTADecoderV1_0(inputBundle); } diff --git a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java index 45b50eb35e..7aca489388 100644 --- a/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/hapref/fasta/FASTADecoderV1_0.java @@ -1,20 +1,19 @@ package htsjdk.beta.codecs.hapref.fasta; +import htsjdk.annotations.InternalAPI; +import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.hapref.HaploidReferenceDecoder; import htsjdk.beta.plugin.hapref.HaploidReferenceFormats; -import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.reference.ReferenceSequence; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.CloseableIterator; -import htsjdk.annotations.InternalAPI; - import java.io.IOException; /** @@ -25,7 +24,9 @@ public class FASTADecoderV1_0 implements HaploidReferenceDecoder { protected Bundle inputBundle; @Override - public String getDisplayName() { return displayName; } + public String getDisplayName() { + return displayName; + } private final ReferenceSequenceFile referenceSequenceFile; @@ -34,22 +35,23 @@ public FASTADecoderV1_0(final Bundle inputBundle) { this.displayName = inputBundle.getPrimaryResource().getDisplayName(); final BundleResource referenceResource = inputBundle.getOrThrow(BundleResourceType.CT_HAPLOID_REFERENCE); if (referenceResource.getIOPath().isPresent()) { - referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(inputBundle, true, true); + referenceSequenceFile = + ReferenceSequenceFileFactory.getReferenceSequenceFileFromBundle(inputBundle, true, true); } else { - final SeekableStream seekableStream = referenceResource.getSeekableStream().orElseThrow( - () -> new IllegalArgumentException( - String.format("The reference resource %s is not able to supply the required seekable stream", - referenceResource.getDisplayName()))); + final SeekableStream seekableStream = referenceResource + .getSeekableStream() + .orElseThrow(() -> new IllegalArgumentException(String.format( + "The reference resource %s is not able to supply the required seekable stream", + referenceResource.getDisplayName()))); referenceSequenceFile = ReferenceSequenceFileFactory.getReferenceSequenceFile( - referenceResource.getDisplayName(), - seekableStream, - null - ); + referenceResource.getDisplayName(), seekableStream, null); } } @Override - final public String getFileFormat() { return HaploidReferenceFormats.FASTA; } + public final String getFileFormat() { + return HaploidReferenceFormats.FASTA; + } @Override public SAMSequenceDictionary getHeader() { @@ -83,7 +85,7 @@ public ReferenceSequence next() { public void close() { try { referenceSequenceFile.close(); - } catch(final IOException e) { + } catch (final IOException e) { throw new HtsjdkIOException(e); } } @@ -100,7 +102,7 @@ public boolean hasIndex() { return bundleContainsIndex(inputBundle) && referenceSequenceFile.isIndexed(); } - //TODO: we need a solution here that doesn't depend on this getter...its necessary because + // TODO: we need a solution here that doesn't depend on this getter...its necessary because // the generic decoder interface exports an iterable, but we need the native // (indexed by contig) interface implemented on ReferenceSequenceFile to create a ReferenceSource, // it might be possible to write a CRAMReferenceSource implementation that uses the HtsQuery @@ -130,5 +132,4 @@ public void close() { private static boolean bundleContainsIndex(final Bundle inputBundle) { return inputBundle.get(BundleResourceType.CT_READS_INDEX).isPresent(); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/ReadsCodecUtils.java b/src/main/java/htsjdk/beta/codecs/reads/ReadsCodecUtils.java index 9e08c9559d..e1d337a3b8 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/ReadsCodecUtils.java +++ b/src/main/java/htsjdk/beta/codecs/reads/ReadsCodecUtils.java @@ -1,5 +1,6 @@ package htsjdk.beta.codecs.reads; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.codecs.reads.bam.BAMDecoderOptions; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; @@ -8,8 +9,6 @@ import htsjdk.samtools.SamInputResource; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; -import htsjdk.annotations.InternalAPI; - import java.util.Optional; /** @@ -18,7 +17,7 @@ * Utilities for use by reads encoder/decoder implementations. */ @InternalAPI -final public class ReadsCodecUtils { +public final class ReadsCodecUtils { /** * InternalAPI @@ -32,17 +31,10 @@ final public class ReadsCodecUtils { */ @InternalAPI public static SamInputResource bundleToSamInputResource( - final Bundle inputBundle, - final ReadsDecoderOptions readsDecoderOptions) { - final SamInputResource samInputResource = readsToSamInputResource( - inputBundle, - BundleResourceType.CT_ALIGNED_READS, - readsDecoderOptions); - indexToSamInputResource( - inputBundle, - BundleResourceType.CT_READS_INDEX, - readsDecoderOptions, - samInputResource); + final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { + final SamInputResource samInputResource = + readsToSamInputResource(inputBundle, BundleResourceType.CT_ALIGNED_READS, readsDecoderOptions); + indexToSamInputResource(inputBundle, BundleResourceType.CT_READS_INDEX, readsDecoderOptions, samInputResource); return samInputResource; } @@ -56,15 +48,14 @@ public static SamInputResource bundleToSamInputResource( */ @InternalAPI public static void readsDecoderOptionsToSamReaderFactory( - final ReadsDecoderOptions readsDecoderOptions, - final SamReaderFactory samReaderFactory) { + final ReadsDecoderOptions readsDecoderOptions, final SamReaderFactory samReaderFactory) { samReaderFactory.validationStringency(readsDecoderOptions.getValidationStringency()); samReaderFactory.setOption(SamReaderFactory.Option.EAGERLY_DECODE, readsDecoderOptions.isDecodeEagerly()); - samReaderFactory.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, - readsDecoderOptions.isFileBasedIndexCached()); + samReaderFactory.setOption( + SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, readsDecoderOptions.isFileBasedIndexCached()); // note that this option is the revers polarity from SamReaderFactory, so negate it - samReaderFactory.setOption(SamReaderFactory.Option.DONT_MEMORY_MAP_INDEX, - !readsDecoderOptions.isMemoryMapIndexes()); + samReaderFactory.setOption( + SamReaderFactory.Option.DONT_MEMORY_MAP_INDEX, !readsDecoderOptions.isMemoryMapIndexes()); } /** @@ -95,8 +86,7 @@ public static void assertBundleContainsIndex(final Bundle inputBundle) { if (!bundleContainsIndex(inputBundle)) { throw new IllegalArgumentException(String.format( "To make index queries, an index resource must be provided in the resource bundle: %s", - inputBundle - )); + inputBundle)); } } @@ -121,19 +111,16 @@ public static SamReader getSamReader( @InternalAPI public static void bamDecoderOptionsToSamReaderFactory( - final SamReaderFactory samReaderFactory, - final BAMDecoderOptions bamDecoderOptions) { + final SamReaderFactory samReaderFactory, final BAMDecoderOptions bamDecoderOptions) { samReaderFactory.inflaterFactory(bamDecoderOptions.getInflaterFactory()); samReaderFactory.setUseAsyncIo(bamDecoderOptions.isAsyncIO()); - samReaderFactory.setOption(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS, - bamDecoderOptions.isValidateCRCChecksums()); + samReaderFactory.setOption( + SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS, bamDecoderOptions.isValidateCRCChecksums()); } // convert an input bundle to a SamInputResource private static SamInputResource readsToSamInputResource( - final Bundle inputBundle, - final String contentType, - final ReadsDecoderOptions readsDecoderOptions) { + final Bundle inputBundle, final String contentType, final ReadsDecoderOptions readsDecoderOptions) { final BundleResource readsInput = inputBundle.getOrThrow(contentType); if (!readsInput.hasInputType()) { throw new IllegalArgumentException(String.format( @@ -143,8 +130,9 @@ private static SamInputResource readsToSamInputResource( if (readsInput.hasSeekableStream()) { if (readsInput.getIOPath().isPresent()) { if (readsDecoderOptions.getReadsChannelTransformer().isPresent()) { - //TODO: use a local cloud channel wrapper instead of requiring the user to pass a lambda - return SamInputResource.of(readsInput.getIOPath().get().toPath(), + // TODO: use a local cloud channel wrapper instead of requiring the user to pass a lambda + return SamInputResource.of( + readsInput.getIOPath().get().toPath(), readsDecoderOptions.getReadsChannelTransformer().get()); } else { return SamInputResource.of(readsInput.getIOPath().get().toPath()); @@ -168,8 +156,9 @@ private static void indexToSamInputResource( if (indexResource.getIOPath().isPresent()) { if (indexResource.getIOPath().isPresent()) { if (readsDecoderOptions.getIndexChannelTransformer().isPresent()) { - //TODO: use a local cloud channel wrapper instead of requiring the user to pass a lambda - SamInputResource.of(indexResource.getIOPath().get().toPath(), + // TODO: use a local cloud channel wrapper instead of requiring the user to pass a lambda + SamInputResource.of( + indexResource.getIOPath().get().toPath(), readsDecoderOptions.getIndexChannelTransformer().get()); samInputResource.index(indexResource.getIOPath().get().toPath()); } else if (indexResource.getSeekableStream().isPresent()) { @@ -181,5 +170,4 @@ private static void indexToSamInputResource( } } } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMCodec.java b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMCodec.java index f1e2c4b611..f3e306ace3 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMCodec.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMCodec.java @@ -1,14 +1,13 @@ package htsjdk.beta.codecs.reads.bam; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.io.bundle.BundleResourceType; -import htsjdk.io.IOPath; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsCodec; import htsjdk.beta.plugin.reads.ReadsFormats; +import htsjdk.io.IOPath; import htsjdk.samtools.util.FileExtensions; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.util.Arrays; import java.util.HashSet; import java.util.Set; @@ -20,17 +19,18 @@ */ @InternalAPI public abstract class BAMCodec implements ReadsCodec { - public static final HtsVersion BAM_DEFAULT_VERSION = new HtsVersion(1, 0,0); + public static final HtsVersion BAM_DEFAULT_VERSION = new HtsVersion(1, 0, 0); private static final Set extensionMap = new HashSet<>(Arrays.asList(FileExtensions.BAM)); @Override - public String getFileFormat() { return ReadsFormats.BAM; } + public String getFileFormat() { + return ReadsFormats.BAM; + } @Override public boolean canDecodeURI(final IOPath ioPath) { ValidationUtils.nonNull(ioPath, "ioPath"); - return extensionMap.stream().anyMatch(ext-> ioPath.hasExtension(ext)); + return extensionMap.stream().anyMatch(ext -> ioPath.hasExtension(ext)); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoder.java b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoder.java index 4190c3b8ba..34c0227ac6 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoder.java @@ -1,11 +1,11 @@ package htsjdk.beta.codecs.reads.bam; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.reads.ReadsDecoder; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsFormats; -import htsjdk.beta.plugin.reads.ReadsDecoder; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; /** @@ -30,19 +30,24 @@ public abstract class BAMDecoder implements ReadsDecoder { */ @InternalAPI public BAMDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { - ValidationUtils.nonNull(inputBundle,"inputBundle"); + ValidationUtils.nonNull(inputBundle, "inputBundle"); ValidationUtils.nonNull(readsDecoderOptions, "readsDecoderOptions"); this.inputBundle = inputBundle; - this.displayName = inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); this.readsDecoderOptions = readsDecoderOptions; } @Override - final public String getFileFormat() { return ReadsFormats.BAM; } + public final String getFileFormat() { + return ReadsFormats.BAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } /** * Get the input {@link Bundle} for this decoder. diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoderOptions.java b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoderOptions.java index 34b6e0307f..df04eb5cbc 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoderOptions.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMDecoderOptions.java @@ -9,7 +9,7 @@ * Decoder options specific to BAM decoders. */ public class BAMDecoderOptions { - //SAMRecordFactory isn't carried over from SAMReaderFactory as an option, since it doesn't appear to + // SAMRecordFactory isn't carried over from SAMReaderFactory as an option, since it doesn't appear to // actually be used anywhere anymore (??) // // includeInSource isn't carried over since it populates SAMRecords with a SamReader, which the plugin @@ -19,8 +19,8 @@ public class BAMDecoderOptions { // (all, including splitting) index creation into htsjdk. private InflaterFactory inflaterFactory = BlockGunzipper.getDefaultInflaterFactory(); - private boolean asyncIO = Defaults.USE_ASYNC_IO_READ_FOR_SAMTOOLS; - private boolean validateCRCChecksums = false; + private boolean asyncIO = Defaults.USE_ASYNC_IO_READ_FOR_SAMTOOLS; + private boolean validateCRCChecksums = false; /** * Get the {@link InflaterFactory} used for these options. Defaults to @@ -85,5 +85,4 @@ public BAMDecoderOptions setValidateCRCChecksums(final boolean validateCRCChecks this.validateCRCChecksums = validateCRCChecksums; return this; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoder.java b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoder.java index dc9d5f7010..9b09f1d6d7 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoder.java @@ -2,9 +2,9 @@ import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.reads.ReadsEncoder; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.beta.plugin.reads.ReadsFormats; -import htsjdk.beta.plugin.reads.ReadsEncoder; import htsjdk.utils.ValidationUtils; /** @@ -31,19 +31,24 @@ public abstract class BAMEncoder implements ReadsEncoder { * @param readsEncoderOptions {@link ReadsEncoderOptions} to use */ public BAMEncoder(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) { - ValidationUtils.nonNull(outputBundle,"outputBundle"); + ValidationUtils.nonNull(outputBundle, "outputBundle"); ValidationUtils.nonNull(readsEncoderOptions, "readsEncoderOptions"); this.outputBundle = outputBundle; this.readsEncoderOptions = readsEncoderOptions; - this.displayName = outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); } @Override - final public String getFileFormat() { return ReadsFormats.BAM; } + public final String getFileFormat() { + return ReadsFormats.BAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } /** * Get the output {@link Bundle} for this encoder. @@ -62,5 +67,4 @@ public Bundle getOutputBundle() { public ReadsEncoderOptions getReadsEncoderOptions() { return readsEncoderOptions; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoderOptions.java b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoderOptions.java index 30ae57424f..d967f965d8 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoderOptions.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/BAMEncoderOptions.java @@ -15,13 +15,13 @@ public class BAMEncoderOptions { public static final int DEAFULT_MAX_RECORDS_IN_RAM = 500000; - private int outputBufferSize = Defaults.BUFFER_SIZE; - private boolean asyncIO = Defaults.USE_ASYNC_IO_WRITE_FOR_SAMTOOLS; - private int asyncOutputBufferSize = AbstractAsyncWriter.DEFAULT_QUEUE_SIZE; - private IOPath tempDirPath = new HtsPath(IOUtil.getDefaultTmpDirPath().toString()); - private int compressionLevel = BlockCompressedOutputStream.getDefaultCompressionLevel(); - private Integer maxRecordsInRAM = DEAFULT_MAX_RECORDS_IN_RAM; - private DeflaterFactory deflaterFactory = BlockCompressedOutputStream.getDefaultDeflaterFactory(); + private int outputBufferSize = Defaults.BUFFER_SIZE; + private boolean asyncIO = Defaults.USE_ASYNC_IO_WRITE_FOR_SAMTOOLS; + private int asyncOutputBufferSize = AbstractAsyncWriter.DEFAULT_QUEUE_SIZE; + private IOPath tempDirPath = new HtsPath(IOUtil.getDefaultTmpDirPath().toString()); + private int compressionLevel = BlockCompressedOutputStream.getDefaultCompressionLevel(); + private Integer maxRecordsInRAM = DEAFULT_MAX_RECORDS_IN_RAM; + private DeflaterFactory deflaterFactory = BlockCompressedOutputStream.getDefaultDeflaterFactory(); // SAM only ?: private SamFlagField samFlagFieldOutput = SamFlagField.NONE; /** @@ -120,7 +120,7 @@ public BAMEncoderOptions setTemporaryDirectory(final IOPath tempDirPath) { * {@link htsjdk.samtools.util.BlockCompressedStreamConstants#DEFAULT_COMPRESSION_LEVEL}. * See {@link htsjdk.samtools.util.BlockCompressedStreamConstants#DEFAULT_COMPRESSION_LEVEL} * - * @return the compression level for these options, 1 <= compressionLevel <= 9 + * @return the compression level for these options, {@code 1 <= compressionLevel <= 9} */ public int getCompressionLevel() { return compressionLevel; @@ -130,7 +130,7 @@ public int getCompressionLevel() { * Set the compression level for these options. Defaults value is * {@link htsjdk.samtools.util.BlockCompressedStreamConstants#DEFAULT_COMPRESSION_LEVEL}. * - * @param compressionLevel the compression level for these options, 1 <= compressionLevel <= 9 + * @param compressionLevel the compression level for these options, {@code 1 <= compressionLevel <= 9} * @return updated options */ public BAMEncoderOptions setCompressionLevel(int compressionLevel) { @@ -194,5 +194,4 @@ public BAMEncoderOptions setDeflaterFactory(DeflaterFactory deflaterFactory) { this.deflaterFactory = deflaterFactory; return this; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMCodecV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMCodecV1_0.java index 1f13c92672..18fac5c1e9 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMCodecV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMCodecV1_0.java @@ -3,17 +3,16 @@ import htsjdk.beta.codecs.reads.bam.BAMCodec; import htsjdk.beta.codecs.reads.bam.BAMDecoder; import htsjdk.beta.codecs.reads.bam.BAMEncoder; +import htsjdk.beta.exception.HtsjdkIOException; +import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; -import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.samtools.SamStreams; import htsjdk.samtools.util.BlockCompressedStreamConstants; import htsjdk.utils.ValidationUtils; - import java.io.IOException; /** @@ -28,7 +27,9 @@ public HtsVersion getVersion() { } @Override - public int getSignatureProbeLength() { return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; } + public int getSignatureProbeLength() { + return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; + } @Override public int getSignatureLength() { @@ -62,5 +63,4 @@ public BAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOption public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsVersion targetCodecVersion) { throw new HtsjdkUnsupportedOperationException("Upgrade not yet implemented"); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMDecoderV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMDecoderV1_0.java index f802fc0797..e4a1fb275e 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMDecoderV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMDecoderV1_0.java @@ -2,14 +2,13 @@ import htsjdk.beta.codecs.reads.ReadsCodecUtils; import htsjdk.beta.codecs.reads.bam.BAMDecoder; +import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; -import htsjdk.beta.plugin.interval.HtsIntervalUtils; -import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.interval.HtsInterval; +import htsjdk.beta.plugin.interval.HtsIntervalUtils; import htsjdk.beta.plugin.interval.HtsQueryRule; - import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.samtools.QueryInterval; import htsjdk.samtools.SAMFileHeader; @@ -18,13 +17,12 @@ import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloseableIterator; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.util.List; import java.util.Optional; /** - * BAM v1.0 decoder. + * BAM v1.0 decoder. */ public class BAMDecoderV1_0 extends BAMDecoder { private final SamReader samReader; @@ -78,9 +76,8 @@ public CloseableIterator query(final List intervals, fin ValidationUtils.nonNull(queryRule, "queryRule"); ReadsCodecUtils.assertBundleContainsIndex(getInputBundle()); - final QueryInterval[] queryIntervals = HtsIntervalUtils.toQueryIntervalArray( - intervals, - samFileHeader.getSequenceDictionary()); + final QueryInterval[] queryIntervals = + HtsIntervalUtils.toQueryIntervalArray(intervals, samFileHeader.getSequenceDictionary()); return samReader.query(queryIntervals, queryRule == HtsQueryRule.CONTAINED); } @@ -116,5 +113,4 @@ public void close() { throw new HtsjdkIOException(String.format("Exception closing reader for %s", getInputBundle()), e); } } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMEncoderV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMEncoderV1_0.java index b0fa567229..fd2230250d 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMEncoderV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/bam/bamV1_0/BAMEncoderV1_0.java @@ -4,16 +4,15 @@ import htsjdk.beta.codecs.reads.bam.BAMEncoderOptions; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.io.bundle.Bundle; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.utils.ValidationUtils; - import java.util.Optional; /** @@ -50,8 +49,7 @@ public void write(final SAMRecord record) { ValidationUtils.nonNull(record, "record"); if (samFileWriter == null) { throw new IllegalStateException(String.format( - "A SAMFileHeader must be established before records can be written for %s", - getDisplayName())); + "A SAMFileHeader must be established before records can be written for %s", getDisplayName())); } samFileWriter.addAlignment(record); } @@ -67,11 +65,11 @@ public void close() { * Propagate BAMEncoderOptions to a SAMFileWriterFactory. */ private static void bamEncoderOptionsToSamWriterFactory( - final BAMEncoderOptions bamEncoderOptions, - final SAMFileWriterFactory samFileWriterFactory) { + final BAMEncoderOptions bamEncoderOptions, final SAMFileWriterFactory samFileWriterFactory) { samFileWriterFactory.setDeflaterFactory(bamEncoderOptions.getDeflaterFactory()); samFileWriterFactory.setCompressionLevel(bamEncoderOptions.getCompressionLevel()); - samFileWriterFactory.setTempDirectory(bamEncoderOptions.getTemporaryDirectory().toPath().toFile()); + samFileWriterFactory.setTempDirectory( + bamEncoderOptions.getTemporaryDirectory().toPath().toFile()); samFileWriterFactory.setBufferSize(bamEncoderOptions.getOutputBufferSize()); samFileWriterFactory.setUseAsyncIo(bamEncoderOptions.isAsyncIO()); samFileWriterFactory.setAsyncOutputBufferSize(bamEncoderOptions.getAsyncOutputBufferSize()); @@ -79,8 +77,7 @@ private static void bamEncoderOptionsToSamWriterFactory( } private SAMFileWriter getBAMFileWriter( - final ReadsEncoderOptions readsEncoderOptions, - final SAMFileHeader samFileHeader) { + final ReadsEncoderOptions readsEncoderOptions, final SAMFileHeader samFileHeader) { final BAMEncoderOptions bamEncoderOptions = readsEncoderOptions.getBAMEncoderOptions(); final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); bamEncoderOptionsToSamWriterFactory(bamEncoderOptions, samFileWriterFactory); @@ -91,7 +88,7 @@ private SAMFileWriter getBAMFileWriter( final Optional optIndexResource = getOutputBundle().get(BundleResourceType.CT_READS_INDEX); final Optional optMD5Resource = getOutputBundle().get(BundleResourceType.CT_MD5); - //TODO: BAMFileWriter currently only supports writing an index to a plain file, so for now + // TODO: BAMFileWriter currently only supports writing an index to a plain file, so for now // throw if an index is requested on any other type if (optIndexResource.isPresent()) { final BundleResource indexResource = optIndexResource.get(); @@ -102,25 +99,20 @@ private SAMFileWriter getBAMFileWriter( } } - //TODO: BAMFileWriter currently only supports writing an md5 to a plain file with a name that + // TODO: BAMFileWriter currently only supports writing an md5 to a plain file with a name that // it chooses, so throw if an md5 resource is specified since we can't direct it to the specified // resource if (optMD5Resource.isPresent()) { - throw new HtsjdkUnsupportedOperationException(String.format( - "Specifying an an MD5 resource name not yet implemented on %s", getDisplayName())); + throw new HtsjdkUnsupportedOperationException( + String.format("Specifying an an MD5 resource name not yet implemented on %s", getDisplayName())); } if (readsResource.getIOPath().isPresent()) { return samFileWriterFactory.makeBAMWriter( - samFileHeader, - preSorted, - readsResource.getIOPath().get().toPath()); + samFileHeader, preSorted, readsResource.getIOPath().get().toPath()); } else { return samFileWriterFactory.makeBAMWriter( - samFileHeader, - preSorted, - readsResource.getOutputStream().get()); + samFileHeader, preSorted, readsResource.getOutputStream().get()); } } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMCodec.java b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMCodec.java index 1e56bf7bc0..2899e69b0e 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMCodec.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMCodec.java @@ -1,23 +1,22 @@ package htsjdk.beta.codecs.reads.cram; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.codecs.hapref.fasta.FASTADecoderV1_0; +import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.io.IOPath; import htsjdk.beta.plugin.HtsVersion; -import htsjdk.beta.plugin.registry.HtsDefaultRegistry; import htsjdk.beta.plugin.reads.ReadsCodec; import htsjdk.beta.plugin.reads.ReadsFormats; +import htsjdk.beta.plugin.registry.HtsDefaultRegistry; +import htsjdk.io.IOPath; import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.ReferenceSequenceFile; import htsjdk.samtools.util.FileExtensions; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.util.Arrays; import java.util.HashSet; @@ -33,13 +32,15 @@ public abstract class CRAMCodec implements ReadsCodec { protected static final Set extensionMap = new HashSet<>(Arrays.asList(FileExtensions.CRAM)); @Override - public String getFileFormat() { return ReadsFormats.CRAM; } + public String getFileFormat() { + return ReadsFormats.CRAM; + } @Override public boolean canDecodeURI(final IOPath ioPath) { ValidationUtils.nonNull(ioPath, "ioPath"); - return extensionMap.stream().anyMatch(ext-> ioPath.hasExtension(ext)); + return extensionMap.stream().anyMatch(ext -> ioPath.hasExtension(ext)); } @Override @@ -74,7 +75,7 @@ static CRAMReferenceSource getCRAMReferenceSource(final IOPath referencePath) { throw new HtsjdkException(String.format("Unable to get reference codec for %s", referencePath)); } - //TODO: we need a solution here doesn't require access to this getter...its necessary because + // TODO: we need a solution here doesn't require access to this getter...its necessary because // the generic decoder interface is an iterable, but we need the native (indexed // by contig) interface implemented on ReferenceSequenceFile to create a ReferenceSource, so we // need to cast the decoder to get access to the ReferenceSequenceFile; it might be possible to @@ -89,5 +90,4 @@ static CRAMReferenceSource getCRAMReferenceSource(final IOPath referencePath) { * @return the signature string for this codec */ protected abstract String getSignatureString(); - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoder.java b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoder.java index 86136f96f7..c8e2365403 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoder.java @@ -1,5 +1,6 @@ package htsjdk.beta.codecs.reads.cram; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.codecs.reads.ReadsCodecUtils; import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.io.bundle.Bundle; @@ -7,9 +8,9 @@ import htsjdk.beta.plugin.interval.HtsInterval; import htsjdk.beta.plugin.interval.HtsIntervalUtils; import htsjdk.beta.plugin.interval.HtsQueryRule; +import htsjdk.beta.plugin.reads.ReadsDecoder; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsFormats; -import htsjdk.beta.plugin.reads.ReadsDecoder; import htsjdk.samtools.QueryInterval; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFormatException; @@ -20,9 +21,7 @@ import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.util.CloseableIterator; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.util.List; import java.util.Optional; @@ -57,17 +56,22 @@ public CRAMDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDeco this.inputBundle = inputBundle; this.readsDecoderOptions = readsDecoderOptions; - this.displayName = inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); samReader = getSamReaderForCRAM(inputBundle, readsDecoderOptions); samFileHeader = samReader.getFileHeader(); } @Override - final public String getFileFormat() { return ReadsFormats.CRAM; } + public final String getFileFormat() { + return ReadsFormats.CRAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } @Override public SAMFileHeader getHeader() { @@ -84,7 +88,9 @@ public void close() { } @Override - public CloseableIterator iterator() { return getIteratorMonitor(() -> samReader.iterator()); } + public CloseableIterator iterator() { + return getIteratorMonitor(() -> samReader.iterator()); + } @Override public boolean isQueryable() { @@ -101,16 +107,16 @@ public CloseableIterator query(final List intervals, fin ValidationUtils.nonNull(intervals, "intervals"); ValidationUtils.nonNull(queryRule, "queryRule"); - final QueryInterval[] queryIntervals = HtsIntervalUtils.toQueryIntervalArray( - intervals, - samFileHeader.getSequenceDictionary()); + final QueryInterval[] queryIntervals = + HtsIntervalUtils.toQueryIntervalArray(intervals, samFileHeader.getSequenceDictionary()); return getIteratorMonitor(() -> samReader.query(queryIntervals, queryRule == HtsQueryRule.CONTAINED)); } @Override public CloseableIterator queryStart(final String queryName, final long start) { ValidationUtils.nonNull(queryName, "queryName"); - return getIteratorMonitor(() -> samReader.queryAlignmentStart(queryName, HtsIntervalUtils.toIntegerSafe(start))); + return getIteratorMonitor( + () -> samReader.queryAlignmentStart(queryName, HtsIntervalUtils.toIntegerSafe(start))); } @Override @@ -126,27 +132,28 @@ public Optional queryMate(SAMRecord rec) { ValidationUtils.nonNull(rec, "rec"); if (!rec.getReadPairedFlag()) { - throw new IllegalArgumentException(String.format("queryMate called for unpaired read on %s.", getDisplayName())); + throw new IllegalArgumentException( + String.format("queryMate called for unpaired read on %s.", getDisplayName())); } if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) { - throw new IllegalArgumentException(String.format("SAMRecord must be either first and second of pair, but not both (%s).", - getDisplayName())); + throw new IllegalArgumentException(String.format( + "SAMRecord must be either first and second of pair, but not both (%s).", getDisplayName())); } final boolean firstOfPair = rec.getFirstOfPairFlag(); // its important that this method closes the iterators it creates, since otherwise the caller // will never be able to create another iterator after calling this method try (final CloseableIterator it = - rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? - queryUnmapped() : - queryStart(rec.getMateReferenceName(), rec.getMateAlignmentStart())) { + rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX + ? queryUnmapped() + : queryStart(rec.getMateReferenceName(), rec.getMateAlignmentStart())) { SAMRecord mateRec = null; while (it.hasNext()) { final SAMRecord next = it.next(); if (!next.getReadPairedFlag()) { if (rec.getReadName().equals(next.getReadName())) { - throw new SAMFormatException(String.format("Paired and unpaired reads with same name: %s (on %s)", - rec.getReadName(), - getInputBundle())); + throw new SAMFormatException(String.format( + "Paired and unpaired reads with same name: %s (on %s)", + rec.getReadName(), getInputBundle())); } continue; } @@ -157,11 +164,9 @@ public Optional queryMate(SAMRecord rec) { } if (rec.getReadName().equals(next.getReadName())) { if (mateRec != null) { - throw new SAMFormatException( - String.format("Multiple SAMRecord with read name %s for %s end on %s.", - rec.getReadName(), - (firstOfPair ? "second" : "first"), - getInputBundle())); + throw new SAMFormatException(String.format( + "Multiple SAMRecord with read name %s for %s end on %s.", + rec.getReadName(), (firstOfPair ? "second" : "first"), getInputBundle())); } mateRec = next; } @@ -190,7 +195,7 @@ public ReadsDecoderOptions getReadsDecoderOptions() { // TODO: If we've been handed a CRAMReferenceSource from the caller, then we don't want to close it // when the decoder is closed, but if we create it, then we need to close it. - //TODO: creation of the source should be separate from the getting of the source, and the result + // TODO: creation of the source should be separate from the getting of the source, and the result // cached, so we don't create multiple reference Sources @InternalAPI public static CRAMReferenceSource getCRAMReferenceSource(final CRAMDecoderOptions cramDecoderOptions) { @@ -199,7 +204,8 @@ public static CRAMReferenceSource getCRAMReferenceSource(final CRAMDecoderOption if (cramDecoderOptions.getReferenceSource().isPresent()) { return cramDecoderOptions.getReferenceSource().get(); } else if (cramDecoderOptions.getReferencePath().isPresent()) { - return CRAMCodec.getCRAMReferenceSource(cramDecoderOptions.getReferencePath().get()); + return CRAMCodec.getCRAMReferenceSource( + cramDecoderOptions.getReferencePath().get()); } // if none is specified, get the default "lazy" reference source that throws when queried, to allow // operations that don't require a reference @@ -220,7 +226,8 @@ private void toggleIteratorExists(final boolean newState) { "The previous iterator must be closed before starting a new iterator on %s", getDisplayName())); } else { // this indicates a problem with this codec - throw new IllegalStateException(String.format("No outstanding iterator exists for %s", getDisplayName())); + throw new IllegalStateException( + String.format("No outstanding iterator exists for %s", getDisplayName())); } } // reset the iterator monitor @@ -244,7 +251,9 @@ public void close() { } @Override - public boolean hasNext() { return wrappedIterator.hasNext(); } + public boolean hasNext() { + return wrappedIterator.hasNext(); + } @Override public T next() { @@ -255,8 +264,7 @@ public T next() { // Propagate all reads decoder options and all bam decoder options to either a SamReaderFactory // or a SamInputResource, and return the resulting SamReader private static SamReader getSamReaderForCRAM( - final Bundle inputBundle, - final ReadsDecoderOptions readsDecoderOptions) { + final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { // note that some reads decoder options, such as cloud wrapper values, need to be propagated // to the samInputResource, not to the SamReaderFactory final SamInputResource samInputResource = @@ -269,11 +277,9 @@ private static SamReader getSamReaderForCRAM( } private static void cramDecoderOptionsToSamReaderFactory( - final SamReaderFactory samReaderFactory, - final CRAMDecoderOptions cramDecoderOptions) { - //TODO: CRAMFileReader doesn't honor the requested inflater, but it should - //samReaderFactory.inflaterFactory(cramDecoderOptions.getInflaterFactory()); + final SamReaderFactory samReaderFactory, final CRAMDecoderOptions cramDecoderOptions) { + // TODO: CRAMFileReader doesn't honor the requested inflater, but it should + // samReaderFactory.inflaterFactory(cramDecoderOptions.getInflaterFactory()); samReaderFactory.referenceSource(getCRAMReferenceSource(cramDecoderOptions)); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoderOptions.java b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoderOptions.java index 798ef70181..b989330a65 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoderOptions.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMDecoderOptions.java @@ -2,7 +2,6 @@ import htsjdk.io.IOPath; import htsjdk.samtools.cram.ref.CRAMReferenceSource; - import java.util.Optional; /** @@ -32,8 +31,8 @@ public Optional getReferenceSource() { public CRAMDecoderOptions setReferenceSource(final CRAMReferenceSource referenceSource) { if (referencePath != null) { throw new IllegalStateException(String.format( - "Reference source and reference path are mutually exclusive. Reference path already has value %s.", - referencePath.getRawInputString())); + "Reference source and reference path are mutually exclusive. Reference path already has value %s.", + referencePath.getRawInputString())); } this.referencePath = null; this.referenceSource = referenceSource; @@ -67,5 +66,4 @@ public CRAMDecoderOptions setReferencePath(final IOPath referencePath) { this.referencePath = referencePath; return this; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoder.java b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoder.java index 7a4f6caa58..5e7581fdc2 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoder.java @@ -1,21 +1,20 @@ package htsjdk.beta.codecs.reads.cram; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.reads.ReadsEncoder; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.beta.plugin.reads.ReadsFormats; -import htsjdk.beta.plugin.reads.ReadsEncoder; import htsjdk.samtools.CRAMFileWriter; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.ref.ReferenceSource; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.util.Optional; /** @@ -47,14 +46,19 @@ public CRAMEncoder(final Bundle outputBundle, final ReadsEncoderOptions readsEnc this.outputBundle = outputBundle; this.readsEncoderOptions = readsEncoderOptions; - this.displayName = outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); } @Override - final public String getFileFormat() { return ReadsFormats.CRAM; } + public final String getFileFormat() { + return ReadsFormats.CRAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } @Override public void setHeader(final SAMFileHeader samFileHeader) { @@ -116,7 +120,8 @@ public static CRAMReferenceSource getCRAMReferenceSource(final CRAMEncoderOption if (cramEncoderOptions.getReferenceSource().isPresent()) { return cramEncoderOptions.getReferenceSource().get(); } else if (cramEncoderOptions.getReferencePath().isPresent()) { - return CRAMCodec.getCRAMReferenceSource(cramEncoderOptions.getReferencePath().get()); + return CRAMCodec.getCRAMReferenceSource( + cramEncoderOptions.getReferencePath().get()); } // if none is specified, get the default "lazy" reference source that throws when queried, to allow @@ -124,7 +129,8 @@ public static CRAMReferenceSource getCRAMReferenceSource(final CRAMEncoderOption return ReferenceSource.getDefaultCRAMReferenceSource(); } - private CRAMFileWriter getCRAMWriter(final SAMFileHeader samFileHeader, final ReadsEncoderOptions readsEncoderOptions) { + private CRAMFileWriter getCRAMWriter( + final SAMFileHeader samFileHeader, final ReadsEncoderOptions readsEncoderOptions) { // the CRAMFileWriter constructors assume presorted; so if we're presorted, use the CRAMFileWriters // directly so we can support writing to a stream if (readsEncoderOptions.isPreSorted()) { @@ -153,18 +159,19 @@ private CRAMFileWriter getCRAMWriter(final SAMFileHeader samFileHeader, final Re final Optional optIndexResource = getOutputBundle().get(BundleResourceType.CT_READS_INDEX); final Optional optMD5Resource = getOutputBundle().get(BundleResourceType.CT_MD5); - //TODO: SamFileWriterFactory code paths currently only support writing an index to a plain file, so + // TODO: SamFileWriterFactory code paths currently only support writing an index to a plain file, so // for now throw if an index is requested on any other type if (optIndexResource.isPresent()) { final BundleResource indexResource = optIndexResource.get(); if (indexResource.getIOPath().isPresent()) { samFileWriterFactory.setCreateIndex(true); } else { - throw new HtsjdkUnsupportedOperationException("Writing a CRAM index to a stream is not yet supported"); + throw new HtsjdkUnsupportedOperationException( + "Writing a CRAM index to a stream is not yet supported"); } } - //TODO: CRAMFileWriter currently only supports writing an md5 to a plain file with a name that + // TODO: CRAMFileWriter currently only supports writing an md5 to a plain file with a name that // it chooses, so throw if an md5 resource is specified since we can't direct it to the specified // resource if (optMD5Resource.isPresent()) { @@ -187,5 +194,4 @@ private CRAMFileWriter getCRAMWriter(final SAMFileHeader samFileHeader, final Re } } } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoderOptions.java b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoderOptions.java index a64bae5c4b..3a7b1d4e30 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoderOptions.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/CRAMEncoderOptions.java @@ -2,8 +2,6 @@ import htsjdk.io.IOPath; import htsjdk.samtools.cram.ref.CRAMReferenceSource; -import htsjdk.utils.ValidationUtils; - import java.util.Optional; /** @@ -68,5 +66,4 @@ public CRAMEncoderOptions setReferencePath(final IOPath referencePath) { this.referencePath = referencePath; return this; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMCodecV2_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMCodecV2_1.java index a3b4a8c5ef..a53c3faa6a 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMCodecV2_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMCodecV2_1.java @@ -3,8 +3,8 @@ import htsjdk.beta.codecs.reads.cram.CRAMCodec; import htsjdk.beta.codecs.reads.cram.CRAMDecoder; import htsjdk.beta.codecs.reads.cram.CRAMEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.samtools.cram.structure.CramHeader; @@ -44,6 +44,7 @@ public CRAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOptio } @Override - protected String getSignatureString() { return CRAM_MAGIC_2_1; } - + protected String getSignatureString() { + return CRAM_MAGIC_2_1; + } } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMDecoderV2_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMDecoderV2_1.java index c91d381208..2b29ff3b32 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMDecoderV2_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMDecoderV2_1.java @@ -1,9 +1,9 @@ package htsjdk.beta.codecs.reads.cram.cramV2_1; import htsjdk.beta.codecs.reads.cram.CRAMDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; /** @@ -27,5 +27,4 @@ public CRAMDecoderV2_1(final Bundle bundle, final ReadsDecoderOptions readsDecod public HtsVersion getVersion() { return CRAMCodecV2_1.VERSION_2_1; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMEncoderV2_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMEncoderV2_1.java index bae7c8a87b..57c57c8b2c 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMEncoderV2_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV2_1/CRAMEncoderV2_1.java @@ -1,9 +1,9 @@ package htsjdk.beta.codecs.reads.cram.cramV2_1; import htsjdk.beta.codecs.reads.cram.CRAMEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; /** @@ -27,5 +27,4 @@ public CRAMEncoderV2_1(final Bundle outputBundle, final ReadsEncoderOptions read public HtsVersion getVersion() { return CRAMCodecV2_1.VERSION_2_1; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMCodecV3_0.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMCodecV3_0.java index bcb4fba356..d907cdb668 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMCodecV3_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMCodecV3_0.java @@ -3,14 +3,13 @@ import htsjdk.beta.codecs.reads.cram.CRAMCodec; import htsjdk.beta.codecs.reads.cram.CRAMDecoder; import htsjdk.beta.codecs.reads.cram.CRAMEncoder; +import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.samtools.cram.structure.CramHeader; - import java.io.IOException; import java.util.Arrays; @@ -56,6 +55,7 @@ public CRAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOptio } @Override - protected String getSignatureString() { return CRAM_MAGIC_3_0; } - + protected String getSignatureString() { + return CRAM_MAGIC_3_0; + } } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMDecoderV3_0.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMDecoderV3_0.java index ff86467844..ebc57bce3a 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMDecoderV3_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMDecoderV3_0.java @@ -27,5 +27,4 @@ public CRAMDecoderV3_0(final Bundle bundle, final ReadsDecoderOptions readsDecod public HtsVersion getVersion() { return CRAMCodecV3_0.VERSION_3_0; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMEncoderV3_0.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMEncoderV3_0.java index 19dfade893..cbb6639ddd 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMEncoderV3_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_0/CRAMEncoderV3_0.java @@ -27,5 +27,4 @@ public CRAMEncoderV3_0(final Bundle outputBundle, final ReadsEncoderOptions read public HtsVersion getVersion() { return CRAMCodecV3_0.VERSION_3_0; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMCodecV3_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMCodecV3_1.java index 5f0606d069..e221188015 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMCodecV3_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMCodecV3_1.java @@ -10,7 +10,6 @@ import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.samtools.cram.structure.CramHeader; - import java.io.IOException; import java.util.Arrays; @@ -56,6 +55,7 @@ public CRAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOptio } @Override - protected String getSignatureString() { return CRAM_MAGIC_3_1; } - + protected String getSignatureString() { + return CRAM_MAGIC_3_1; + } } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMDecoderV3_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMDecoderV3_1.java index ccfd90a141..895589dab5 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMDecoderV3_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMDecoderV3_1.java @@ -27,5 +27,4 @@ public CRAMDecoderV3_1(final Bundle bundle, final ReadsDecoderOptions readsDecod public HtsVersion getVersion() { return CRAMCodecV3_1.VERSION_3_1; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java index fdcc53a8cc..0d2618fc2e 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java +++ b/src/main/java/htsjdk/beta/codecs/reads/cram/cramV3_1/CRAMEncoderV3_1.java @@ -5,7 +5,6 @@ import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; -import htsjdk.samtools.cram.CRAMException; /** * CRAM v3.1 encoder. @@ -22,12 +21,10 @@ public class CRAMEncoderV3_1 extends CRAMEncoder { */ public CRAMEncoderV3_1(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) { super(outputBundle, readsEncoderOptions); - throw new CRAMException("CRAM v3.1 encoding is not yet supported"); } @Override public HtsVersion getVersion() { return CRAMCodecV3_1.VERSION_3_1; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMCodec.java b/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMCodec.java index c8adccbf7a..f2934f1579 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMCodec.java +++ b/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMCodec.java @@ -10,12 +10,10 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.htsget.HtsgetFormat; import htsjdk.samtools.util.htsget.HtsgetRequest; - import java.util.Arrays; import java.util.HashSet; import java.util.Set; - /** * Base class for concrete implementations of reads codecs that handle * {@link BundleResourceType#FMT_READS_HTSGET_BAM} codecs. @@ -31,10 +29,14 @@ public abstract class HtsgetBAMCodec implements ReadsCodec { /** * The HtsGet protocol doesn't seem to have a version number ? */ - public HtsVersion getVersion() { return HTSGET_VERSION; } + public HtsVersion getVersion() { + return HTSGET_VERSION; + } @Override - public String getFileFormat() { return ReadsFormats.HTSGET_BAM; } + public String getFileFormat() { + return ReadsFormats.HTSGET_BAM; + } @Override public int getSignatureLength() { @@ -48,25 +50,26 @@ public boolean ownsURI(final IOPath ioPath) { private boolean matchesScheme(final IOPath ioPath) { final String scheme = ioPath.getScheme(); - return scheme.equals(HtsgetBAMFileReader.HTSGET_SCHEME) || - scheme.equals("https") || - scheme.equals("http"); + return scheme.equals(HtsgetBAMFileReader.HTSGET_SCHEME) || scheme.equals("https") || scheme.equals("http"); } public boolean handlesURI(final IOPath ioPath) { - final boolean hasExtension = extensionMap.stream().anyMatch(ext-> ioPath.hasExtension(ext)); + final boolean hasExtension = extensionMap.stream().anyMatch(ext -> ioPath.hasExtension(ext)); final boolean hasScheme = matchesScheme(ioPath); - //TODO: does this check for "/reads/" at the start of the path ? should it ? + // TODO: does this check for "/reads/" at the start of the path ? should it ? final HtsgetRequest htsgetRequest = new HtsgetRequest(ioPath.getURI()); // no format == default == BAM - final boolean matchesRequestType = htsgetRequest.getFormat() == null || htsgetRequest.getFormat() == HtsgetFormat.BAM; + final boolean matchesRequestType = + htsgetRequest.getFormat() == null || htsgetRequest.getFormat() == HtsgetFormat.BAM; return hasExtension && hasScheme && matchesRequestType; } @Override - public boolean canDecodeURI(final IOPath ioPath) { return handlesURI(ioPath); } + public boolean canDecodeURI(final IOPath ioPath) { + return handlesURI(ioPath); + } @Override public boolean canDecodeSignature(final SignatureStream probingInputStream, final String sourceName) { @@ -74,11 +77,13 @@ public boolean canDecodeSignature(final SignatureStream probingInputStream, fina } boolean isQueryable() { - //is this correct ?? + // is this correct ?? return true; } - boolean hasIndex() { return false; } + boolean hasIndex() { + return false; + } @Override public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsVersion targetCodecVersion) { diff --git a/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMDecoder.java b/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMDecoder.java index 59bc35e6a1..ebb71992f9 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMDecoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/htsget/HtsgetBAMDecoder.java @@ -19,11 +19,14 @@ public abstract class HtsgetBAMDecoder implements ReadsDecoder { public HtsgetBAMDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { this.inputBundle = inputBundle; this.readsDecoderOptions = readsDecoderOptions; - this.displayName = inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); } @Override - public String getFileFormat() { return ReadsFormats.HTSGET_BAM; } + public String getFileFormat() { + return ReadsFormats.HTSGET_BAM; + } @Override public HtsVersion getVersion() { @@ -31,7 +34,9 @@ public HtsVersion getVersion() { } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } /** * Get the input {@link Bundle} for this decoder. diff --git a/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMCodecV1_2.java b/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMCodecV1_2.java index 2a870beab7..4560d9c8ee 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMCodecV1_2.java +++ b/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMCodecV1_2.java @@ -2,15 +2,14 @@ import htsjdk.beta.codecs.reads.htsget.HtsgetBAMCodec; import htsjdk.beta.codecs.reads.htsget.HtsgetBAMDecoder; -import htsjdk.beta.plugin.HtsEncoder; -import htsjdk.beta.plugin.HtsRecord; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; -import htsjdk.io.IOPath; -import htsjdk.beta.plugin.reads.ReadsEncoderOptions; +import htsjdk.beta.plugin.HtsEncoder; +import htsjdk.beta.plugin.HtsRecord; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; - +import htsjdk.beta.plugin.reads.ReadsEncoderOptions; +import htsjdk.io.IOPath; import java.util.Optional; /** @@ -19,8 +18,7 @@ public class HtsgetBAMCodecV1_2 extends HtsgetBAMCodec { @Override - public HtsgetBAMDecoder getDecoder(final Bundle inputBundle, - final ReadsDecoderOptions decodeOptions) { + public HtsgetBAMDecoder getDecoder(final Bundle inputBundle, final ReadsDecoderOptions decodeOptions) { final BundleResource readsResource = inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS); final Optional inputPath = readsResource.getIOPath(); if (!inputPath.isPresent()) { @@ -33,5 +31,4 @@ public HtsgetBAMDecoder getDecoder(final Bundle inputBundle, public HtsEncoder getEncoder(Bundle outputBundle, ReadsEncoderOptions encodeOptions) { throw new IllegalArgumentException("Htsget is read only - no Htsget BAM encoder component is available."); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMDecoderV1_2.java b/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMDecoderV1_2.java index e3593ea5f1..3bb51eff7a 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMDecoderV1_2.java +++ b/src/main/java/htsjdk/beta/codecs/reads/htsget/htsgetBAMV1_2/HtsgetBAMDecoderV1_2.java @@ -1,6 +1,8 @@ package htsjdk.beta.codecs.reads.htsget.htsgetBAMV1_2; import htsjdk.beta.codecs.reads.htsget.HtsgetBAMDecoder; +import htsjdk.beta.exception.HtsjdkIOException; +import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; @@ -8,15 +10,12 @@ import htsjdk.beta.plugin.interval.HtsIntervalUtils; import htsjdk.beta.plugin.interval.HtsQueryRule; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; -import htsjdk.beta.exception.HtsjdkIOException; -import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.samtools.DefaultSAMRecordFactory; import htsjdk.samtools.HtsgetBAMFileReader; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.util.CloseableIterator; - import java.io.IOException; import java.util.List; import java.util.Optional; @@ -45,7 +44,10 @@ public HtsgetBAMDecoderV1_2(final Bundle inputBundle, final ReadsDecoderOptions false); } catch (IOException e) { throw new HtsjdkIOException( - String.format("Failure opening Htsget reader on %s", readsResource.getIOPath().get()), e); + String.format( + "Failure opening Htsget reader on %s", + readsResource.getIOPath().get()), + e); } } @@ -84,8 +86,7 @@ public CloseableIterator query(final String queryString) { @Override public CloseableIterator query(final List intervals, final HtsQueryRule queryRule) { return htsgetReader.query( - HtsIntervalUtils.toLocatableList(intervals), - (queryRule == HtsQueryRule.CONTAINED) == true); + HtsIntervalUtils.toLocatableList(intervals), (queryRule == HtsQueryRule.CONTAINED) == true); } @Override @@ -100,7 +101,7 @@ public CloseableIterator queryUnmapped() { @Override public Optional queryMate(final SAMRecord rec) { - //reader doesn't support this + // reader doesn't support this throw new HtsjdkUnsupportedOperationException("queryMate not implemented for htsget BAM reader"); } } diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMCodec.java b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMCodec.java index 7e88ddac2b..580c5d9a47 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMCodec.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMCodec.java @@ -1,14 +1,13 @@ package htsjdk.beta.codecs.reads.sam; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.io.bundle.SignatureStream; import htsjdk.beta.plugin.reads.ReadsCodec; import htsjdk.beta.plugin.reads.ReadsFormats; import htsjdk.io.IOPath; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.util.Arrays; @@ -23,7 +22,9 @@ public abstract class SAMCodec implements ReadsCodec { private static String SAM_EXTENSION = ".sam"; @Override - public String getFileFormat() { return ReadsFormats.SAM; } + public String getFileFormat() { + return ReadsFormats.SAM; + } @Override public String getDisplayName() { @@ -31,7 +32,9 @@ public String getDisplayName() { } @Override - public boolean ownsURI(IOPath ioPath) { return false; } + public boolean ownsURI(IOPath ioPath) { + return false; + } @Override public boolean canDecodeURI(IOPath ioPath) { @@ -53,11 +56,12 @@ public boolean canDecodeSignature(final SignatureStream probingInputStream, fina } @Override - public int getSignatureProbeLength() { return SAM_HEADER_SENTINEL.length(); } + public int getSignatureProbeLength() { + return SAM_HEADER_SENTINEL.length(); + } @Override public int getSignatureLength() { return SAM_HEADER_SENTINEL.length(); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMDecoder.java b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMDecoder.java index 32aca0067c..411de1f397 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMDecoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMDecoder.java @@ -1,11 +1,11 @@ package htsjdk.beta.codecs.reads.sam; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.plugin.reads.ReadsDecoder; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsFormats; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; /** @@ -30,18 +30,23 @@ public abstract class SAMDecoder implements ReadsDecoder { */ @InternalAPI public SAMDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { - ValidationUtils.nonNull(inputBundle,"inputBundle"); + ValidationUtils.nonNull(inputBundle, "inputBundle"); ValidationUtils.nonNull(readsDecoderOptions, "readsDecoderOptions"); this.inputBundle = inputBundle; - this.displayName = inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + inputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); this.readsDecoderOptions = readsDecoderOptions; } @Override - final public String getFileFormat() { return ReadsFormats.SAM; } + public final String getFileFormat() { + return ReadsFormats.SAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } /** * Get the input {@link Bundle} for this decoder. diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMEncoder.java b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMEncoder.java index 2eda3fcf69..d0bea8bc59 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/SAMEncoder.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/SAMEncoder.java @@ -32,14 +32,19 @@ public abstract class SAMEncoder implements ReadsEncoder { public SAMEncoder(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) { this.outputBundle = outputBundle; this.readsEncoderOptions = readsEncoderOptions; - this.displayName = outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); + this.displayName = + outputBundle.getOrThrow(BundleResourceType.CT_ALIGNED_READS).getDisplayName(); } @Override - final public String getFileFormat() { return ReadsFormats.SAM; } + public final String getFileFormat() { + return ReadsFormats.SAM; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } /** * Get the output {@link Bundle} for this encoder. @@ -58,5 +63,4 @@ public Bundle getOutputBundle() { public ReadsEncoderOptions getReadsEncoderOptions() { return readsEncoderOptions; } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMCodecV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMCodecV1_0.java index e858734f66..5ef9849858 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMCodecV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMCodecV1_0.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.reads.sam.SAMDecoder; import htsjdk.beta.codecs.reads.sam.SAMEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; @@ -16,7 +16,9 @@ public class SAMCodecV1_0 extends SAMCodec { public static final HtsVersion VERSION_1 = new HtsVersion(1, 0, 0); @Override - public HtsVersion getVersion() { return VERSION_1; } + public HtsVersion getVersion() { + return VERSION_1; + } @Override public SAMDecoder getDecoder(final Bundle inputBundle, final ReadsDecoderOptions decoderOptions) { @@ -32,5 +34,4 @@ public SAMEncoder getEncoder(final Bundle outputBundle, final ReadsEncoderOption public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsVersion targetCodecVersion) { throw new HtsjdkUnsupportedOperationException("Version upgrade not yet implemented"); } - } diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMDecoderV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMDecoderV1_0.java index 9fad68240b..e87e3e9d74 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMDecoderV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMDecoderV1_0.java @@ -3,16 +3,15 @@ import htsjdk.beta.codecs.reads.ReadsCodecUtils; import htsjdk.beta.codecs.reads.sam.SAMDecoder; import htsjdk.beta.exception.HtsjdkIOException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SamReader; import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloseableIterator; - import java.io.IOException; import java.util.Optional; @@ -56,7 +55,9 @@ public CloseableIterator iterator() { } @Override - public boolean isQueryable() { return false; } + public boolean isQueryable() { + return false; + } @Override public boolean hasIndex() { diff --git a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMEncoderV1_0.java b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMEncoderV1_0.java index 0274a49d54..4b1f4e231e 100644 --- a/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMEncoderV1_0.java +++ b/src/main/java/htsjdk/beta/codecs/reads/sam/samV1_0/SAMEncoderV1_0.java @@ -1,18 +1,17 @@ package htsjdk.beta.codecs.reads.sam.samV1_0; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.codecs.reads.sam.SAMEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMFileWriter; import htsjdk.samtools.SAMFileWriterFactory; import htsjdk.samtools.SAMRecord; -import htsjdk.annotations.InternalAPI; - import java.util.Optional; /** @@ -48,8 +47,7 @@ public void setHeader(final SAMFileHeader samFileHeader) { public void write(final SAMRecord record) { if (samFileWriter == null) { throw new IllegalStateException(String.format( - "A SAMFileHeader must be established before a SAM writer can be established %s", - getDisplayName())); + "A SAMFileHeader must be established before a SAM writer can be established %s", getDisplayName())); } samFileWriter.addAlignment(record); } @@ -62,8 +60,7 @@ public void close() { } private SAMFileWriter getSAMFileWriter( - final ReadsEncoderOptions readsEncoderOptions, - final SAMFileHeader samFileHeader) { + final ReadsEncoderOptions readsEncoderOptions, final SAMFileHeader samFileHeader) { final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory(); final boolean preSorted = readsEncoderOptions.isPreSorted(); @@ -72,7 +69,7 @@ private SAMFileWriter getSAMFileWriter( final Optional optIndexResource = getOutputBundle().get(BundleResourceType.CT_READS_INDEX); final Optional optMD5Resource = getOutputBundle().get(BundleResourceType.CT_MD5); - //TODO: BAMFileWriter currently only supports writing an index to a plain file, so for now + // TODO: BAMFileWriter currently only supports writing an index to a plain file, so for now // throw if an index is requested on any other type if (optIndexResource.isPresent()) { final BundleResource indexResource = optIndexResource.get(); @@ -83,24 +80,20 @@ private SAMFileWriter getSAMFileWriter( } } - //TODO: BAMFileWriter currently only supports writing an md5 to a plain file with a name that + // TODO: BAMFileWriter currently only supports writing an md5 to a plain file with a name that // it chooses, so throw if an md5 resource is specified since we can't direct it to the specified // resource if (optMD5Resource.isPresent()) { - throw new HtsjdkUnsupportedOperationException(String.format( - "Specifying an an MD5 resource name not yet implemented on %s", getDisplayName())); + throw new HtsjdkUnsupportedOperationException( + String.format("Specifying an an MD5 resource name not yet implemented on %s", getDisplayName())); } if (readsResource.getIOPath().isPresent()) { return samFileWriterFactory.makeSAMWriter( - samFileHeader, - preSorted, - readsResource.getIOPath().get().toPath()); + samFileHeader, preSorted, readsResource.getIOPath().get().toPath()); } else { return samFileWriterFactory.makeSAMWriter( - samFileHeader, - preSorted, - readsResource.getOutputStream().get()); + samFileHeader, preSorted, readsResource.getOutputStream().get()); } } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFCodec.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFCodec.java index c18a60b34e..adc17f8689 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFCodec.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFCodec.java @@ -1,17 +1,16 @@ package htsjdk.beta.codecs.variants.vcf; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.exception.HtsjdkIOException; -import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.io.IOPath; +import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.plugin.variants.VariantsCodec; import htsjdk.beta.plugin.variants.VariantsFormats; +import htsjdk.io.IOPath; import htsjdk.samtools.util.BlockCompressedStreamConstants; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -29,6 +28,7 @@ public abstract class VCFCodec implements VariantsCodec { // FileExtensions.VCF_LIST includes BCF, which we don't want included here private static final Set extensionMap = new HashSet() { private static final long serialVersionUID = 1L; + { add(FileExtensions.VCF); add(FileExtensions.COMPRESSED_VCF); @@ -37,13 +37,15 @@ public abstract class VCFCodec implements VariantsCodec { }; @Override - public String getFileFormat() { return VariantsFormats.VCF; } + public String getFileFormat() { + return VariantsFormats.VCF; + } @Override public boolean canDecodeURI(final IOPath ioPath) { ValidationUtils.nonNull(ioPath, "ioPath"); - return extensionMap.stream().anyMatch(ext-> ioPath.hasExtension(ext)); + return extensionMap.stream().anyMatch(ext -> ioPath.hasExtension(ext)); } @Override @@ -52,7 +54,9 @@ public int getSignatureLength() { } @Override - public int getSignatureProbeLength() { return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; } + public int getSignatureProbeLength() { + return BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; + } @Override public boolean canDecodeSignature(final SignatureStream probingInputStream, final String sourceName) { @@ -61,9 +65,9 @@ public boolean canDecodeSignature(final SignatureStream probingInputStream, fina final byte[] signatureBytes = new byte[getSignatureLength()]; try { - final InputStream wrappedInputStream = IOUtil.isGZIPInputStream(probingInputStream) ? - new GZIPInputStream(probingInputStream) : - probingInputStream; + final InputStream wrappedInputStream = IOUtil.isGZIPInputStream(probingInputStream) + ? new GZIPInputStream(probingInputStream) + : probingInputStream; final int numRead = wrappedInputStream.read(signatureBytes); if (numRead < 0) { throw new HtsjdkIOException(String.format("0 bytes read from input stream for %s", sourceName)); @@ -80,5 +84,4 @@ public boolean canDecodeSignature(final SignatureStream probingInputStream, fina * @return the signature string for this codec */ protected abstract String getSignatureString(); - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFDecoder.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFDecoder.java index 31674fa064..82e80e28a5 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFDecoder.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFDecoder.java @@ -1,12 +1,13 @@ package htsjdk.beta.codecs.variants.vcf; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.plugin.interval.HtsInterval; import htsjdk.beta.plugin.interval.HtsIntervalUtils; import htsjdk.beta.plugin.interval.HtsQueryRule; @@ -19,17 +20,13 @@ import htsjdk.samtools.util.CloseableIterator; import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.FeatureReader; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.AbstractVCFCodec; import htsjdk.variant.vcf.VCFHeader; - import java.io.IOException; -import java.nio.channels.SeekableByteChannel; import java.util.List; import java.util.Optional; -import java.util.function.Function; /** * InternalAPII @@ -65,16 +62,21 @@ public VCFDecoder( this.inputBundle = inputBundle; this.variantsDecoderOptions = variantsDecoderOptions; - this.displayName = inputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS).getDisplayName(); + this.displayName = + inputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS).getDisplayName(); vcfReader = getVCFReader(inputBundle, vcfCodec, variantsDecoderOptions); vcfHeader = (VCFHeader) vcfReader.getHeader(); } @Override - final public String getFileFormat() { return VariantsFormats.VCF; } + public final String getFileFormat() { + return VariantsFormats.VCF; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } @Override public VCFHeader getHeader() { @@ -86,7 +88,8 @@ public CloseableIterator iterator() { try { return vcfReader.iterator(); } catch (IOException e) { - throw new HtsjdkIOException(String.format("Exception creating variant context iterator for %s", displayName), e); + throw new HtsjdkIOException( + String.format("Exception creating variant context iterator for %s", displayName), e); } } @@ -108,13 +111,15 @@ public CloseableIterator query(final List intervals VariantsCodecUtils.assertBundleContainsIndex(getInputBundle()); if (intervals.size() > 1) { - //TODO: implement lists, sorting, merging, and ensuring that features that overlap more than one interval + // TODO: implement lists, sorting, merging, and ensuring that features that overlap more than one interval // are only returned once - throw new HtsjdkUnsupportedOperationException(String.format("query for lists not yet implemented for decoder %s", displayName)); + throw new HtsjdkUnsupportedOperationException( + String.format("query for lists not yet implemented for decoder %s", displayName)); } if (queryRule != HtsQueryRule.OVERLAPPING) { - //TODO: implement overlapping - throw new HtsjdkUnsupportedOperationException(String.format("query for contained intervals not implemented for this decoder %s", displayName)); + // TODO: implement overlapping + throw new HtsjdkUnsupportedOperationException( + String.format("query for contained intervals not implemented for this decoder %s", displayName)); } try { @@ -140,19 +145,18 @@ public CloseableIterator queryStart(final String queryName, fina if (vcfHeader == null) { throw new HtsjdkException(String.format( - "A valid VCF header is required to execute a query, but is not present: %s.", - displayName)); + "A valid VCF header is required to execute a query, but is not present: %s.", displayName)); } final SAMSequenceDictionary seqDict = vcfHeader.getSequenceDictionary(); if (seqDict == null) { - throw new HtsjdkException(String.format("No sequence dictionary is present in the input: %s.", displayName)); + throw new HtsjdkException( + String.format("No sequence dictionary is present in the input: %s.", displayName)); } final SAMSequenceRecord samSequenceRecord = seqDict.getSequence(queryName); if (samSequenceRecord == null) { throw new HtsjdkException(String.format( "The query name %s is not present in the dictionary provided in the input: %s.", - queryName, - displayName)); + queryName, displayName)); } final int length = samSequenceRecord.getSequenceLength(); try { @@ -190,22 +194,19 @@ public VariantsDecoderOptions getReadsDecoderOptions() { } private static FeatureReader getVCFReader( - final Bundle inputBundle, - final AbstractVCFCodec vcfCodec, - final VariantsDecoderOptions decoderOptions) { + final Bundle inputBundle, final AbstractVCFCodec vcfCodec, final VariantsDecoderOptions decoderOptions) { final BundleResource variantsResource = inputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS); if (!variantsResource.hasInputType()) { throw new IllegalArgumentException(String.format( "The provided %s resource (%s) must be a readable/input resource", - BundleResourceType.CT_VARIANT_CONTEXTS, - variantsResource)); + BundleResourceType.CT_VARIANT_CONTEXTS, variantsResource)); } else if (variantsResource.getIOPath().isEmpty()) { throw new HtsjdkUnsupportedOperationException("VCF reader from stream not implemented"); } final IOPath variantsIOPath = variantsResource.getIOPath().get(); final Optional indexIOPath = getIndexIOPath(inputBundle); - //TODO: this resolves the index automatically. it should check to make sure the provided index + // TODO: this resolves the index automatically. it should check to make sure the provided index // matches the one that is automatically resolved, otherwise throw since the request will not be honored return AbstractFeatureReader.getFeatureReader( variantsIOPath.getURIString(), @@ -213,8 +214,7 @@ private static FeatureReader getVCFReader( vcfCodec, indexIOPath.isPresent(), decoderOptions.getVariantsChannelTransformer().orElse(null), - decoderOptions.getIndexChannelTransformer().orElse(null) - ); + decoderOptions.getIndexChannelTransformer().orElse(null)); } // the underlying readers can't handle index streams, so for now we can only handle IOPaths @@ -226,14 +226,12 @@ private static Optional getIndexIOPath(final Bundle inputBundle) { final BundleResource indexResource = optIndexResource.get(); if (!indexResource.hasInputType()) { throw new IllegalArgumentException(String.format( - "The provided %s index resource (%s) must be a readable/input resource", - BundleResourceType.CT_VARIANTS_INDEX, - indexResource)); + "The provided %s index resource (%s) must be a readable/input resource", + BundleResourceType.CT_VARIANTS_INDEX, indexResource)); } if (indexResource.getIOPath().isEmpty()) { throw new HtsjdkUnsupportedOperationException("Reading a VCF index from a stream not implemented"); } return indexResource.getIOPath(); } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFEncoder.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFEncoder.java index ab17269024..a30a40c29e 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFEncoder.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/VCFEncoder.java @@ -1,26 +1,24 @@ package htsjdk.beta.codecs.variants.vcf; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.exception.HtsjdkPluginException; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.plugin.variants.VariantsEncoder; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.beta.plugin.variants.VariantsFormats; import htsjdk.io.IOPath; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.writer.Options; import htsjdk.variant.variantcontext.writer.VariantContextWriter; import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; import htsjdk.variant.vcf.VCFHeader; - import java.util.Optional; - /** * InternalAPI * @@ -51,14 +49,19 @@ public VCFEncoder(final Bundle outputBundle, final VariantsEncoderOptions varian this.outputBundle = outputBundle; this.variantsEncoderOptions = variantsEncoderOptions; - this.displayName = outputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS).getDisplayName(); + this.displayName = + outputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS).getDisplayName(); } - @Override - final public String getFileFormat() { return VariantsFormats.VCF; } + @Override + public final String getFileFormat() { + return VariantsFormats.VCF; + } @Override - final public String getDisplayName() { return displayName; } + public final String getDisplayName() { + return displayName; + } @Override public void setHeader(final VCFHeader vcfHeader) { @@ -104,21 +107,17 @@ public VariantsEncoderOptions getVariantsEncoderOptions() { } private static VariantContextWriter getVCFWriter( - final Bundle outputBundle, - final VariantsEncoderOptions variantsEncoderOptions) { - final VariantContextWriterBuilder writerBuilder = - variantsEncoderOptionsToVariantContextWriterBuilder( - variantsEncoderOptions, - outputBundle.get(BundleResourceType.CT_VARIANTS_INDEX).isPresent() - ); + final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { + final VariantContextWriterBuilder writerBuilder = variantsEncoderOptionsToVariantContextWriterBuilder( + variantsEncoderOptions, + outputBundle.get(BundleResourceType.CT_VARIANTS_INDEX).isPresent()); setWriterBuilderOutputs(writerBuilder, outputBundle); return writerBuilder.build(); } // propagate VariantsEncoderOptions -> VariantContextWriterBuilder private static VariantContextWriterBuilder variantsEncoderOptionsToVariantContextWriterBuilder( - final VariantsEncoderOptions variantsEncoderOptions, - final boolean createIndex) { + final VariantsEncoderOptions variantsEncoderOptions, final boolean createIndex) { final VariantContextWriterBuilder vcWriterBuilder = new VariantContextWriterBuilder(); vcWriterBuilder.clearOptions(); @@ -152,21 +151,19 @@ private static VariantContextWriterBuilder variantsEncoderOptionsToVariantContex } private static void setWriterBuilderOutputs( - final VariantContextWriterBuilder writerBuilder, - final Bundle outputBundle) { + final VariantContextWriterBuilder writerBuilder, final Bundle outputBundle) { final BundleResource variantsResource = outputBundle.getOrThrow(BundleResourceType.CT_VARIANT_CONTEXTS); if (!variantsResource.hasOutputType()) { throw new IllegalArgumentException(String.format( "The provided %s resource (%s) must be a writeable/output resource", - BundleResourceType.CT_VARIANT_CONTEXTS, - variantsResource)); + BundleResourceType.CT_VARIANT_CONTEXTS, variantsResource)); } final Optional optIndexIOPath = getIndexIOPath(outputBundle); if (variantsResource.getIOPath().isPresent()) { final IOPath variantsIOPath = variantsResource.getIOPath().get(); if (optIndexIOPath.isPresent()) { - //TODO: this resolves the index automatically. it should check to make sure the provided index + // TODO: this resolves the index automatically. it should check to make sure the provided index // matches the one that is automatically resolved, otherwise throw since the request will not be honored } writerBuilder.setOutputPath(variantsIOPath.toPath()); @@ -175,8 +172,7 @@ private static void setWriterBuilderOutputs( if (optIndexIOPath.isPresent()) { throw new HtsjdkUnsupportedOperationException(String.format( "Can't write a VCF index to file %s when output is written to a stream %s", - optIndexIOPath.get(), - variantsResource)); + optIndexIOPath.get(), variantsResource)); } // VariantContextWriterBuilder doesn't provide any buffering, but if we were to wrap the provided // stream in a buffered stream here, we wouldn't be able to properly control the flushing or lifetime @@ -197,13 +193,11 @@ private static void setWriterBuilderOutputs( private static void validateImputedOutputType(final IOPath variantsIOPath) { final VariantContextWriterBuilder.OutputType imputedOutputType = VariantContextWriterBuilder.determineOutputTypeFromFile(variantsIOPath.toPath()); - if (imputedOutputType != VariantContextWriterBuilder.OutputType.VCF && - imputedOutputType != VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF) { + if (imputedOutputType != VariantContextWriterBuilder.OutputType.VCF + && imputedOutputType != VariantContextWriterBuilder.OutputType.BLOCK_COMPRESSED_VCF) { throw new HtsjdkPluginException(String.format( "An unsupported output type %s was derived for the resource %s ", - imputedOutputType, - variantsIOPath.getRawInputString() - )); + imputedOutputType, variantsIOPath.getRawInputString())); } } @@ -216,13 +210,11 @@ private static Optional getIndexIOPath(final Bundle outputBundle) { if (!indexResource.hasOutputType()) { throw new IllegalArgumentException(String.format( "The provided %s index resource (%s) must be a writeable/output resource", - BundleResourceType.CT_VARIANTS_INDEX, - indexResource)); + BundleResourceType.CT_VARIANTS_INDEX, indexResource)); } if (!indexResource.getIOPath().isPresent()) { throw new HtsjdkUnsupportedOperationException("Writing a VCF index to a stream not implemented"); } return indexResource.getIOPath(); } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/VariantsCodecUtils.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/VariantsCodecUtils.java index ecf5375f42..82410b37f9 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/VariantsCodecUtils.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/VariantsCodecUtils.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; -import htsjdk.annotations.InternalAPI; /** * Utilities for VCF codec implementations. @@ -38,8 +38,7 @@ public static void assertBundleContainsIndex(final Bundle inputBundle) { if (!bundleContainsIndex(inputBundle)) { throw new IllegalArgumentException(String.format( "To make index queries, an index resource must be provided in the resource bundle: %s", - inputBundle - )); + inputBundle)); } } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFCodecV3_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFCodecV3_2.java index 5956aedc0a..711e32c0cc 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFCodecV3_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFCodecV3_2.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.variants.vcf.VCFDecoder; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.utils.ValidationUtils; @@ -14,12 +14,14 @@ * VCF V3.2 codec. */ public class VCFCodecV3_2 extends VCFCodec { - public static final HtsVersion VCF_V32_VERSION = new HtsVersion(3,2,0); + public static final HtsVersion VCF_V32_VERSION = new HtsVersion(3, 2, 0); private static final String VCF_V32_MAGIC = "##format=VCRv3.2"; @Override - public HtsVersion getVersion() { return VCF_V32_VERSION; } + public HtsVersion getVersion() { + return VCF_V32_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -43,6 +45,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V32_MAGIC; } - + protected String getSignatureString() { + return VCF_V32_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFDecoderV3_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFDecoderV3_2.java index 8b692266fd..b4547b9cfd 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFDecoderV3_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFDecoderV3_2.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv3_2; import htsjdk.beta.codecs.variants.vcf.VCFDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.variant.vcf.VCF3Codec; @@ -25,5 +25,4 @@ public VCFDecoderV3_2(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV3_2.VCF_V32_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFEncoderV3_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFEncoderV3_2.java index 3367de3f17..967e618cd6 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFEncoderV3_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_2/VCFEncoderV3_2.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv3_2; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; /** @@ -17,12 +17,11 @@ public class VCFEncoderV3_2 extends VCFEncoder { * @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use */ public VCFEncoderV3_2(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { - super(outputBundle,variantsEncoderOptions); + super(outputBundle, variantsEncoderOptions); } @Override public HtsVersion getVersion() { return VCFCodecV3_2.VCF_V32_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFCodecV3_3.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFCodecV3_3.java index df865c0208..64072b7af0 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFCodecV3_3.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFCodecV3_3.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.variants.vcf.VCFDecoder; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.utils.ValidationUtils; @@ -14,12 +14,14 @@ * VCF V3.3 codec. */ public class VCFCodecV3_3 extends VCFCodec { - public static final HtsVersion VCF_V33_VERSION = new HtsVersion(3,3,0); + public static final HtsVersion VCF_V33_VERSION = new HtsVersion(3, 3, 0); private static final String VCF_V33_MAGIC = "##fileformat=VCFv3.3"; @Override - public HtsVersion getVersion() { return VCF_V33_VERSION; } + public HtsVersion getVersion() { + return VCF_V33_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -43,6 +45,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V33_MAGIC; } - + protected String getSignatureString() { + return VCF_V33_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFDecoderV3_3.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFDecoderV3_3.java index 69a1485009..43a1700f9b 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFDecoderV3_3.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFDecoderV3_3.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv3_3; import htsjdk.beta.codecs.variants.vcf.VCFDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.variant.vcf.VCF3Codec; @@ -25,5 +25,4 @@ public VCFDecoderV3_3(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV3_3.VCF_V33_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFEncoderV3_3.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFEncoderV3_3.java index 2e848ab4fd..82e57f63cc 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFEncoderV3_3.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv3_3/VCFEncoderV3_3.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv3_3; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; /** @@ -17,12 +17,11 @@ public class VCFEncoderV3_3 extends VCFEncoder { * @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use */ public VCFEncoderV3_3(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { - super(outputBundle,variantsEncoderOptions); + super(outputBundle, variantsEncoderOptions); } @Override public HtsVersion getVersion() { return VCFCodecV3_3.VCF_V33_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFCodecV4_0.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFCodecV4_0.java index 88d1453f1f..b7ccc4bab3 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFCodecV4_0.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFCodecV4_0.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.variants.vcf.VCFDecoder; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.utils.ValidationUtils; @@ -14,12 +14,14 @@ * VCF V4.0 codec. */ public class VCFCodecV4_0 extends VCFCodec { - public static final HtsVersion VCF_V40_VERSION = new HtsVersion(4,0,0); + public static final HtsVersion VCF_V40_VERSION = new HtsVersion(4, 0, 0); private static final String VCF_V40_MAGIC = "##fileformat=VCFv4.0"; @Override - public HtsVersion getVersion() { return VCF_V40_VERSION; } + public HtsVersion getVersion() { + return VCF_V40_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -43,6 +45,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V40_MAGIC; } - + protected String getSignatureString() { + return VCF_V40_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFDecoderV4_0.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFDecoderV4_0.java index fb0e57facc..459fc249cb 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFDecoderV4_0.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFDecoderV4_0.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv4_0; import htsjdk.beta.codecs.variants.vcf.VCFDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; /** @@ -24,5 +24,4 @@ public VCFDecoderV4_0(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV4_0.VCF_V40_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFEncoderV4_0.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFEncoderV4_0.java index 77b42a5318..a68b0b7ad4 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFEncoderV4_0.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_0/VCFEncoderV4_0.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv4_0; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; /** @@ -17,12 +17,11 @@ public class VCFEncoderV4_0 extends VCFEncoder { * @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use */ public VCFEncoderV4_0(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { - super(outputBundle,variantsEncoderOptions); + super(outputBundle, variantsEncoderOptions); } @Override public HtsVersion getVersion() { return VCFCodecV4_0.VCF_V40_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFCodecV4_1.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFCodecV4_1.java index 6b114fad34..adbc44d000 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFCodecV4_1.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFCodecV4_1.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.variants.vcf.VCFDecoder; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.utils.ValidationUtils; @@ -14,12 +14,14 @@ * VCF V4.1 codec. */ public class VCFCodecV4_1 extends VCFCodec { - public static final HtsVersion VCF_V41_VERSION = new HtsVersion(4,1,0); + public static final HtsVersion VCF_V41_VERSION = new HtsVersion(4, 1, 0); private static final String VCF_V41_MAGIC = "##fileformat=VCFv4.1"; @Override - public HtsVersion getVersion() { return VCF_V41_VERSION; } + public HtsVersion getVersion() { + return VCF_V41_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -43,6 +45,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V41_MAGIC; } - + protected String getSignatureString() { + return VCF_V41_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFDecoderV4_1.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFDecoderV4_1.java index 9651ba11e0..8c85525f8e 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFDecoderV4_1.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFDecoderV4_1.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv4_1; import htsjdk.beta.codecs.variants.vcf.VCFDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; /** @@ -24,5 +24,4 @@ public VCFDecoderV4_1(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV4_1.VCF_V41_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFEncoderV4_1.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFEncoderV4_1.java index 0418befb3e..b7db01c2fe 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFEncoderV4_1.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_1/VCFEncoderV4_1.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv4_1; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; /** @@ -17,12 +17,11 @@ public class VCFEncoderV4_1 extends VCFEncoder { * @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use */ public VCFEncoderV4_1(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { - super(outputBundle,variantsEncoderOptions); + super(outputBundle, variantsEncoderOptions); } @Override public HtsVersion getVersion() { return VCFCodecV4_1.VCF_V41_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFCodecV4_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFCodecV4_2.java index 30214c4dc1..3eaf1d26a7 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFCodecV4_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFCodecV4_2.java @@ -14,12 +14,14 @@ * VCF V4.2 codec. */ public class VCFCodecV4_2 extends VCFCodec { - public static final HtsVersion VCF_V42_VERSION = new HtsVersion(4,2,0); + public static final HtsVersion VCF_V42_VERSION = new HtsVersion(4, 2, 0); private static final String VCF_V42_MAGIC = "##fileformat=VCFv4.2"; @Override - public HtsVersion getVersion() { return VCF_V42_VERSION; } + public HtsVersion getVersion() { + return VCF_V42_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -43,6 +45,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V42_MAGIC; } - + protected String getSignatureString() { + return VCF_V42_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFDecoderV4_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFDecoderV4_2.java index 097a7a3d74..e30e376df3 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFDecoderV4_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFDecoderV4_2.java @@ -24,5 +24,4 @@ public VCFDecoderV4_2(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV4_2.VCF_V42_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFEncoderV4_2.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFEncoderV4_2.java index e23d976aaf..622acb37e1 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFEncoderV4_2.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_2/VCFEncoderV4_2.java @@ -17,12 +17,11 @@ public class VCFEncoderV4_2 extends VCFEncoder { * @param variantsEncoderOptions the {@link VariantsEncoderOptions} to use */ public VCFEncoderV4_2(final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { - super(outputBundle,variantsEncoderOptions); + super(outputBundle, variantsEncoderOptions); } @Override public HtsVersion getVersion() { return VCFCodecV4_2.VCF_V42_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFCodecV4_3.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFCodecV4_3.java index 44516b4a72..7593928df0 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFCodecV4_3.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFCodecV4_3.java @@ -4,8 +4,8 @@ import htsjdk.beta.codecs.variants.vcf.VCFDecoder; import htsjdk.beta.codecs.variants.vcf.VCFEncoder; import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; import htsjdk.beta.plugin.variants.VariantsEncoderOptions; import htsjdk.utils.ValidationUtils; @@ -14,12 +14,14 @@ * VCF V4.3 codec. */ public class VCFCodecV4_3 extends VCFCodec { - public static final HtsVersion VCF_V43_VERSION = new HtsVersion(4,3,0); + public static final HtsVersion VCF_V43_VERSION = new HtsVersion(4, 3, 0); private static final String VCF_V43_MAGIC = "##fileformat=VCFv4.3"; @Override - public HtsVersion getVersion() { return VCF_V43_VERSION; } + public HtsVersion getVersion() { + return VCF_V43_VERSION; + } @Override public VCFDecoder getDecoder(final Bundle inputBundle, final VariantsDecoderOptions decoderOptions) { @@ -40,6 +42,7 @@ public boolean runVersionUpgrade(final HtsVersion sourceCodecVersion, final HtsV } @Override - protected String getSignatureString() { return VCF_V43_MAGIC; } - + protected String getSignatureString() { + return VCF_V43_MAGIC; + } } diff --git a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFDecoderV4_3.java b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFDecoderV4_3.java index 32de5539fa..92a9051a3d 100644 --- a/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFDecoderV4_3.java +++ b/src/main/java/htsjdk/beta/codecs/variants/vcf/vcfv4_3/VCFDecoderV4_3.java @@ -1,8 +1,8 @@ package htsjdk.beta.codecs.variants.vcf.vcfv4_3; import htsjdk.beta.codecs.variants.vcf.VCFDecoder; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; /** @@ -24,5 +24,4 @@ public VCFDecoderV4_3(final Bundle inputBundle, final VariantsDecoderOptions var public HtsVersion getVersion() { return VCFCodecV4_3.VCF_V43_VERSION; } - } diff --git a/src/main/java/htsjdk/beta/exception/HtsjdkPluginException.java b/src/main/java/htsjdk/beta/exception/HtsjdkPluginException.java index 470b0eb96c..1adebb012b 100644 --- a/src/main/java/htsjdk/beta/exception/HtsjdkPluginException.java +++ b/src/main/java/htsjdk/beta/exception/HtsjdkPluginException.java @@ -14,5 +14,4 @@ public class HtsjdkPluginException extends HtsjdkException { public HtsjdkPluginException(String message) { super(message); } - } diff --git a/src/main/java/htsjdk/beta/io/IOPathUtils.java b/src/main/java/htsjdk/beta/io/IOPathUtils.java index 965c850a59..fe5476af6a 100644 --- a/src/main/java/htsjdk/beta/io/IOPathUtils.java +++ b/src/main/java/htsjdk/beta/io/IOPathUtils.java @@ -3,8 +3,6 @@ import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.io.HtsPath; import htsjdk.io.IOPath; -import htsjdk.utils.ValidationUtils; - import java.io.BufferedOutputStream; import java.io.File; import java.io.IOException; @@ -42,17 +40,14 @@ public static IOPath createTempPath(final String prefix, final String suffix) { public static String getStringFromPath(final IOPath ioPath) { try { final StringWriter stringWriter = new StringWriter(); - //TODO: the UTF-8 encoding of these should be codified somewhere else... - Files.lines(ioPath.toPath(), StandardCharsets.UTF_8).forEach( - line -> { - stringWriter.write(line); - stringWriter.append("\n"); - }); + // TODO: the UTF-8 encoding of these should be codified somewhere else... + Files.lines(ioPath.toPath(), StandardCharsets.UTF_8).forEach(line -> { + stringWriter.write(line); + stringWriter.append("\n"); + }); return stringWriter.toString(); } catch (final IOException e) { - throw new HtsjdkIOException( - String.format("Failed to read from: %s", ioPath.getRawInputString()), - e); + throw new HtsjdkIOException(String.format("Failed to read from: %s", ioPath.getRawInputString()), e); } } @@ -66,9 +61,7 @@ public static void writeStringToPath(final IOPath ioPath, final String contents) try (final BufferedOutputStream bos = new BufferedOutputStream(ioPath.getOutputStream())) { bos.write(contents.getBytes()); } catch (final IOException e) { - throw new HtsjdkIOException( - String.format("Failed to write to: %s", ioPath.getRawInputString()), - e); + throw new HtsjdkIOException(String.format("Failed to write to: %s", ioPath.getRawInputString()), e); } } @@ -91,19 +84,16 @@ public static void writeStringToPath(final IOPath ioPath, final String contents) * @return A new IOPath object with the new extension */ public static T replaceExtension( - final IOPath path, - final String newExtension, - final Function ioPathConstructor){ - final String extensionToUse = newExtension.startsWith(".") ? - newExtension : - "." + newExtension; + final IOPath path, final String newExtension, final Function ioPathConstructor) { + final String extensionToUse = newExtension.startsWith(".") ? newExtension : "." + newExtension; final Optional oldExtension = path.getExtension(); - if (oldExtension.isEmpty()){ + if (oldExtension.isEmpty()) { throw new RuntimeException("The original path has no extension to replace" + path.getURIString()); } final String oldFileName = path.toPath().getFileName().toString(); final String newFileName = oldFileName.replaceAll(oldExtension.get() + "$", extensionToUse); - return ioPathConstructor.apply(path.toPath().resolveSibling(newFileName).toUri().toString()); + return ioPathConstructor.apply( + path.toPath().resolveSibling(newFileName).toUri().toString()); } /** @@ -123,13 +113,10 @@ public static T replaceExtension( * @return A new IOPath object with the new extension */ public static T appendExtension( - final IOPath path, - final String extension, - final Function ioPathConstructor){ + final IOPath path, final String extension, final Function ioPathConstructor) { final String oldFileName = path.toPath().getFileName().toString(); - final String newExtension = extension.startsWith(".") ? - extension : - "." + extension; - return ioPathConstructor.apply(path.toPath().resolveSibling(oldFileName + newExtension).toUri().toString()); + final String newExtension = extension.startsWith(".") ? extension : "." + extension; + return ioPathConstructor.apply( + path.toPath().resolveSibling(oldFileName + newExtension).toUri().toString()); } } diff --git a/src/main/java/htsjdk/beta/io/bundle/Bundle.java b/src/main/java/htsjdk/beta/io/bundle/Bundle.java index 06ef7d12dd..9b7187401c 100644 --- a/src/main/java/htsjdk/beta/io/bundle/Bundle.java +++ b/src/main/java/htsjdk/beta/io/bundle/Bundle.java @@ -2,7 +2,6 @@ import htsjdk.io.IOPath; import htsjdk.utils.ValidationUtils; - import java.io.Serializable; import java.util.*; @@ -50,8 +49,8 @@ public class Bundle implements Iterable, Serializable { */ public Bundle(final String primaryContentType, final Collection resources) { ValidationUtils.nonNull(primaryContentType, "primary content type"); - ValidationUtils.validateArg(primaryContentType.length() > 0, - "A non-zero length primary resource content type must be provided"); + ValidationUtils.validateArg( + primaryContentType.length() > 0, "A non-zero length primary resource content type must be provided"); ValidationUtils.nonNull(resources, "resource collection"); if (resources.isEmpty()) { throw new IllegalArgumentException("A bundle must contain at least one resource"); @@ -67,9 +66,8 @@ public Bundle(final String primaryContentType, final Collection // validate that the primary resource actually exists in the resources if (!this.resources.containsKey(primaryContentType)) { - throw new IllegalArgumentException( - String.format("Primary resource content type %s is not present in the bundle's resources", - primaryContentType)); + throw new IllegalArgumentException(String.format( + "Primary resource content type %s is not present in the bundle's resources", primaryContentType)); } } @@ -94,12 +92,9 @@ public Optional get(final String targetContentType) { */ public BundleResource getOrThrow(final String requiredContentType) { ValidationUtils.nonNull(requiredContentType, "target content string"); - return get(requiredContentType).orElseThrow( - () -> new IllegalArgumentException( - String.format("No resource found in bundle %s with content type %s", - this, - requiredContentType - ))); + return get(requiredContentType) + .orElseThrow(() -> new IllegalArgumentException(String.format( + "No resource found in bundle %s with content type %s", this, requiredContentType))); } /** @@ -107,7 +102,9 @@ public BundleResource getOrThrow(final String requiredContentType) { * * @return the primary content type for this bundle */ - public String getPrimaryContentType() { return primaryContentType; } + public String getPrimaryContentType() { + return primaryContentType; + } /** * Get the primary {@link BundleResource} for this bundle. @@ -131,7 +128,9 @@ public Collection getResources() { * @return iterator of BundleResources for this bundle. */ @Override - public Iterator iterator() { return resources.values().iterator(); } + public Iterator iterator() { + return resources.values().iterator(); + } @Override public boolean equals(Object o) { diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleBuilder.java b/src/main/java/htsjdk/beta/io/bundle/BundleBuilder.java index 4affc3dbfb..139eba5958 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleBuilder.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleBuilder.java @@ -1,7 +1,6 @@ package htsjdk.beta.io.bundle; import htsjdk.utils.ValidationUtils; - import java.util.ArrayList; import java.util.List; @@ -16,7 +15,7 @@ public final class BundleBuilder { /** * Start a new bundle builder. */ - public BundleBuilder() { } + public BundleBuilder() {} /** * Add the primary resource to the bundle. The content type of resource will be the bundle's primary key. @@ -27,10 +26,9 @@ public BundleBuilder() { } public BundleBuilder addPrimary(final BundleResource resource) { ValidationUtils.nonNull(resource, "resource"); if (primaryResource != null) { - throw new IllegalStateException(String.format( - "Can't add primary resource %s to a bundle that already has primary resource %s", - resource.getContentType(), - primaryResource)); + throw new IllegalStateException(String.format( + "Can't add primary resource %s to a bundle that already has primary resource %s", + resource.getContentType(), primaryResource)); } primaryResource = resource.getContentType(); addSecondary(resource); @@ -66,5 +64,3 @@ public Bundle build() { return bundle; } } - - diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java b/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java index 95d4d7aaac..f367e9516e 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleJSON.java @@ -4,10 +4,6 @@ import htsjdk.io.IOPath; import htsjdk.samtools.util.Log; import htsjdk.utils.ValidationUtils; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - import java.util.ArrayList; import java.util.Collection; import java.util.List; @@ -15,8 +11,11 @@ import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; -//TODO: Once the schema is finalized, we need to bump the version # to 1.0, and publish it. +// TODO: Once the schema is finalized, we need to bump the version # to 1.0, and publish it. /** * Methods for serializing and deserializing Bundles to and from JSON strings. @@ -25,17 +24,17 @@ public class BundleJSON { public static final String BUNDLE_EXTENSION = ".json"; private static final Log LOG = Log.getInstance(BundleJSON.class); - public static final String JSON_PROPERTY_SCHEMA = "schema"; - public static final String JSON_PROPERTY_SCHEMA_NAME = "schemaName"; - public static final String JSON_PROPERTY_SCHEMA_VERSION = "schemaVersion"; - public static final String JSON_PROPERTY_PRIMARY = "primary"; - public static final String JSON_PROPERTY_PATH = "path"; - public static final String JSON_PROPERTY_FORMAT = "format"; + public static final String JSON_PROPERTY_SCHEMA = "schema"; + public static final String JSON_PROPERTY_SCHEMA_NAME = "schemaName"; + public static final String JSON_PROPERTY_SCHEMA_VERSION = "schemaVersion"; + public static final String JSON_PROPERTY_PRIMARY = "primary"; + public static final String JSON_PROPERTY_PATH = "path"; + public static final String JSON_PROPERTY_FORMAT = "format"; - public static final String JSON_SCHEMA_NAME = "htsbundle"; - public static final String JSON_SCHEMA_VERSION = "0.1.0"; // TODO: bump this to 1.0.0 + public static final String JSON_SCHEMA_NAME = "htsbundle"; + public static final String JSON_SCHEMA_VERSION = "0.1.0"; // TODO: bump this to 1.0.0 - final private static Set TOP_LEVEL_PROPERTIES = Set.of(JSON_PROPERTY_SCHEMA, JSON_PROPERTY_PRIMARY); + private static final Set TOP_LEVEL_PROPERTIES = Set.of(JSON_PROPERTY_SCHEMA, JSON_PROPERTY_PRIMARY); /** * Serialize a bundle to a JSON string representation. All resources in the bundle must @@ -53,8 +52,8 @@ public static String toJSON(final Bundle bundle) { .put(JSON_PROPERTY_SCHEMA_NAME, JSON_SCHEMA_NAME) .put(JSON_PROPERTY_SCHEMA_VERSION, JSON_SCHEMA_VERSION); final JSONObject outerJSON = new JSONObject() - .put(JSON_PROPERTY_SCHEMA, schemaMap) - .put(JSON_PROPERTY_PRIMARY, bundle.getPrimaryContentType()); + .put(JSON_PROPERTY_SCHEMA, schemaMap) + .put(JSON_PROPERTY_PRIMARY, bundle.getPrimaryContentType()); bundle.forEach(bundleResource -> { final Optional resourcePath = bundleResource.getIOPath(); @@ -63,9 +62,11 @@ public static String toJSON(final Bundle bundle) { } // generate JSON for each bundle resource - final JSONObject resourceJSON = new JSONObject().put(JSON_PROPERTY_PATH, resourcePath.get().getURIString()); + final JSONObject resourceJSON = + new JSONObject().put(JSON_PROPERTY_PATH, resourcePath.get().getURIString()); if (bundleResource.getFileFormat().isPresent()) { - resourceJSON.put(JSON_PROPERTY_FORMAT, bundleResource.getFileFormat().get()); + resourceJSON.put( + JSON_PROPERTY_FORMAT, bundleResource.getFileFormat().get()); } outerJSON.put(bundleResource.getContentType(), resourceJSON); }); @@ -83,21 +84,19 @@ public static String toJSON(final List bundles) { if (bundles.isEmpty()) { throw new IllegalArgumentException("A bundle list must contain at least one bundle"); } - return bundles.stream() - .map(BundleJSON::toJSON) - .collect(Collectors.joining(",\n", "[", "]")); + return bundles.stream().map(BundleJSON::toJSON).collect(Collectors.joining(",\n", "[", "]")); } - /** - * Create a Bundle from a jsonString. - * - * @param jsonString a valid JSON string conforming to the bundle schema (for compatibility, a bundle list is also - * accepted, as long as it only contains a single bundle) - * @return a {@link Bundle} created from jsonString - */ - public static Bundle toBundle(final String jsonString) { + /** + * Create a Bundle from a jsonString. + * + * @param jsonString a valid JSON string conforming to the bundle schema (for compatibility, a bundle list is also + * accepted, as long as it only contains a single bundle) + * @return a {@link Bundle} created from jsonString + */ + public static Bundle toBundle(final String jsonString) { return toBundle(ValidationUtils.nonEmpty(jsonString, "resource list"), HtsPath::new); - } + } /** * Create a Bundle from jsonString using a custom class that implements {@link IOPath} for all resources. @@ -109,8 +108,7 @@ public static Bundle toBundle(final String jsonString) { * @return a newly created {@link Bundle} */ public static Bundle toBundle( - final String jsonString, - final Function ioPathConstructor) { + final String jsonString, final Function ioPathConstructor) { ValidationUtils.nonEmpty(jsonString, "JSON string"); ValidationUtils.nonNull(ioPathConstructor, "IOPath-derived class constructor"); try { @@ -121,16 +119,16 @@ public static Bundle toBundle( try { final List bundles = toBundleList(jsonString, ioPathConstructor); if (bundles.size() > 1) { - throw new IllegalArgumentException( - String.format("A JSON string with more than one bundle was provided but only a single bundle is allowed in this context (%s)", - e.getMessage())); + throw new IllegalArgumentException(String.format( + "A JSON string with more than one bundle was provided but only a single bundle is allowed in this context (%s)", + e.getMessage())); } return bundles.get(0); } catch (JSONException | UnsupportedOperationException e2) { throw new IllegalArgumentException( - String.format("The JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)", - e.getMessage(), - e2.getMessage()), + String.format( + "The JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)", + e.getMessage(), e2.getMessage()), e); } } @@ -145,8 +143,7 @@ public static Bundle toBundle( * @param IOPath-derived class to use for IOPathResources */ public static List toBundleList( - final String jsonString, - final Function ioPathConstructor) { + final String jsonString, final Function ioPathConstructor) { ValidationUtils.nonEmpty(jsonString, "json bundle string"); ValidationUtils.nonNull(ioPathConstructor, "IOPath-derived class constructor"); @@ -154,10 +151,9 @@ public static List toBundleList( try { final JSONArray jsonArray = new JSONArray(jsonString); jsonArray.forEach(element -> { - if (! (element instanceof JSONObject jsonObject)) { - throw new IllegalArgumentException( - String.format("Bundle collections may contain only Bundle objects, found %s", - element.toString())); + if (!(element instanceof JSONObject jsonObject)) { + throw new IllegalArgumentException(String.format( + "Bundle collections may contain only Bundle objects, found %s", element.toString())); } bundles.add(toBundle(jsonObject, ioPathConstructor)); }); @@ -167,9 +163,9 @@ public static List toBundleList( bundles.add(toBundle(new JSONObject(jsonString), ioPathConstructor)); } catch (JSONException | UnsupportedOperationException e2) { throw new IllegalArgumentException( - String.format("JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)", - e2.getMessage(), - e.getMessage()), + String.format( + "JSON can be interpreted neither as an individual bundle (%s) nor as a bundle collection (%s)", + e2.getMessage(), e.getMessage()), e); } } @@ -205,8 +201,8 @@ private static Bundle toBundle( } final String schemaVersion = getRequiredPropertyAsString(schemaMap, JSON_PROPERTY_SCHEMA_VERSION); if (!schemaVersion.equals(JSON_SCHEMA_VERSION)) { - throw new IllegalArgumentException(String.format("Expected bundle schema version %s but found %s", - JSON_SCHEMA_VERSION, schemaVersion)); + throw new IllegalArgumentException(String.format( + "Expected bundle schema version %s but found %s", JSON_SCHEMA_VERSION, schemaVersion)); } final String primaryContentType = getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_PRIMARY); @@ -216,19 +212,18 @@ private static Bundle toBundle( throw new IllegalArgumentException(e); } } + private static IOPathResource toBundleResource( - final String contentType, - final JSONObject jsonObject, - final Function ioPathConstructor) { + final String contentType, final JSONObject jsonObject, final Function ioPathConstructor) { final String format = jsonObject.optString(JSON_PROPERTY_FORMAT, null); return new IOPathResource( ioPathConstructor.apply(getRequiredPropertyAsString(jsonObject, JSON_PROPERTY_PATH)), contentType, format); } + private static Collection toBundleResources( - final JSONObject jsonResources, - final Function ioPathConstructor) { + final JSONObject jsonResources, final Function ioPathConstructor) { final List bundleResources = new ArrayList<>(); // default capacity of 10 seems right jsonResources.keySet().forEach(key -> { @@ -248,9 +243,7 @@ private static String getRequiredPropertyAsString(final JSONObject jsonDocument, final String propertyValue = jsonDocument.optString(propertyName, null); if (propertyValue == null) { throw new IllegalArgumentException( - String.format("JSON bundle is missing the required property %s (%s)", - propertyName, - jsonDocument)); + String.format("JSON bundle is missing the required property %s (%s)", propertyName, jsonDocument)); } return propertyValue; diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleResource.java b/src/main/java/htsjdk/beta/io/bundle/BundleResource.java index 2eb610cb0e..b281b4fe56 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleResource.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleResource.java @@ -2,7 +2,6 @@ import htsjdk.io.IOPath; import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.InputStream; import java.io.OutputStream; import java.util.Optional; @@ -130,5 +129,4 @@ public interface BundleResource { * @return true if this resource can be rendered as a {@link SeekableStream} (see {@link #getSeekableStream}) */ boolean hasSeekableStream(); - } diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleResourceBase.java b/src/main/java/htsjdk/beta/io/bundle/BundleResourceBase.java index dacf766f5b..500aab3d34 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleResourceBase.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleResourceBase.java @@ -3,7 +3,6 @@ import htsjdk.io.IOPath; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.utils.ValidationUtils; - import java.io.InputStream; import java.io.OutputStream; import java.io.Serializable; @@ -29,10 +28,7 @@ public abstract class BundleResourceBase implements BundleResource, Serializable * a resource with content type "READS". Predefined format strings are defined * in {@link BundleResourceType}. */ - public BundleResourceBase( - final String displayName, - final String contentType, - final String format) { + public BundleResourceBase(final String displayName, final String contentType, final String format) { ValidationUtils.nonEmpty(displayName, "display name"); ValidationUtils.nonEmpty(contentType, "content type"); this.displayName = displayName; @@ -41,7 +37,9 @@ public BundleResourceBase( } @Override - public String getDisplayName() { return displayName; } + public String getDisplayName() { + return displayName; + } @Override public String getContentType() { @@ -54,25 +52,39 @@ public Optional getFileFormat() { } @Override - public Optional getIOPath() { return Optional.empty(); } + public Optional getIOPath() { + return Optional.empty(); + } @Override - public Optional getInputStream() { return Optional.empty(); } + public Optional getInputStream() { + return Optional.empty(); + } @Override - public Optional getOutputStream() { return Optional.empty(); } + public Optional getOutputStream() { + return Optional.empty(); + } @Override - public Optional getSeekableStream() { return Optional.empty(); } + public Optional getSeekableStream() { + return Optional.empty(); + } @Override - public boolean hasSeekableStream() { return false; } + public boolean hasSeekableStream() { + return false; + } @Override - public boolean hasInputType() { return false; } + public boolean hasInputType() { + return false; + } @Override - public boolean hasOutputType() { return false; } + public boolean hasOutputType() { + return false; + } @Override public String toString() { diff --git a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java index d083b06107..83f0d07a5e 100644 --- a/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java +++ b/src/main/java/htsjdk/beta/io/bundle/BundleResourceType.java @@ -1,6 +1,5 @@ package htsjdk.beta.io.bundle; -import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.plugin.reads.ReadsFormats; import htsjdk.beta.plugin.variants.VariantsFormats; @@ -26,6 +25,7 @@ public class BundleResourceType { /**************************************** Common primary content types ******************************************/ public static final String CT_ALIGNED_READS = "ALIGNED_READS"; + public static final String CT_VARIANT_CONTEXTS = "VARIANT_CONTEXTS"; public static final String CT_HAPLOID_REFERENCE = "HAPLOID_REFERENCE"; public static final String CT_FEATURES = "FEATURES"; @@ -33,6 +33,7 @@ public class BundleResourceType { /****************************************** Resource types for READS ********************************************/ /** Formats for primary content type {@link BundleResourceType#CT_ALIGNED_READS} */ public static final String FMT_READS_SAM = ReadsFormats.SAM; + public static final String FMT_READS_BAM = ReadsFormats.BAM; public static final String FMT_READS_CRAM = ReadsFormats.CRAM; public static final String FMT_READS_HTSGET_BAM = ReadsFormats.HTSGET_BAM; @@ -41,12 +42,14 @@ public class BundleResourceType { public static final String CT_READS_INDEX = "READS_INDEX"; /** Formats for secondary content type {@link BundleResourceType#CT_READS_INDEX} resources */ public static final String FMT_READS_INDEX_BAI = "BAI"; + public static final String FMT_READS_INDEX_CRAI = "CRAI"; public static final String FMT_READS_INDEX_CSI = "CSI"; /****************************************** Resource types for VARIANTS ******************************************/ /** Format names for content type {@link BundleResourceType#CT_VARIANT_CONTEXTS} */ public static final String FMT_VARIANTS_VCF = VariantsFormats.VCF; + public static final String FMT_VARIANTS_BCF = VariantsFormats.BCF; /** Secondary content types for primary content type {@link #CT_VARIANT_CONTEXTS} resources */ @@ -55,14 +58,12 @@ public class BundleResourceType { /****************************************** Resource types for HAPLOID REFERENCES ********************************/ /** Secondary content types for {@link BundleResourceType#CT_HAPLOID_REFERENCE} resources*/ public static final String CT_REFERENCE_DICTIONARY = "REFERENCE_DICTIONARY"; + public static final String CT_REFERENCE_INDEX = "REFERENCE_INDEX"; public static final String CT_REFERENCE_INDEX_GZI = "REFERENCE_INDEX_GZI"; - /****************************************** Resource types for FEATURES ********************************/ - /****************************************** MISCELLANEOUS Resource types ********************************/ public static final String CT_MD5 = "MD5"; - } diff --git a/src/main/java/htsjdk/beta/io/bundle/IOPathResource.java b/src/main/java/htsjdk/beta/io/bundle/IOPathResource.java index be200942a4..580f8bbde4 100644 --- a/src/main/java/htsjdk/beta/io/bundle/IOPathResource.java +++ b/src/main/java/htsjdk/beta/io/bundle/IOPathResource.java @@ -5,7 +5,6 @@ import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -31,20 +30,20 @@ public IOPathResource(final IOPath ioPath, final String contentType) { /** * Create a {@link BundleResource} backed by an IOPath, specifying a content type string and format string. - + * * @param ioPath The IOPath for this resource. May not be null. * @param contentType The content type for this resource. May not be mull or 0-length. * @param format The format for this resource. May not be null or 0-length. */ public IOPathResource(final IOPath ioPath, final String contentType, final String format) { - super(ValidationUtils.nonNull(ioPath, "ioPath").getRawInputString(), - contentType, - format); + super(ValidationUtils.nonNull(ioPath, "ioPath").getRawInputString(), contentType, format); this.ioPath = ioPath; } @Override - public Optional getIOPath() { return Optional.of(ioPath); } + public Optional getIOPath() { + return Optional.of(ioPath); + } /** * {@inheritDoc} @@ -59,13 +58,19 @@ public Optional getInputStream() { } @Override - public Optional getOutputStream() { return Optional.of(ioPath.getOutputStream()); } + public Optional getOutputStream() { + return Optional.of(ioPath.getOutputStream()); + } @Override - public boolean hasInputType() { return true; } + public boolean hasInputType() { + return true; + } @Override - public boolean hasOutputType() { return true; } + public boolean hasOutputType() { + return true; + } @Override public boolean hasSeekableStream() { @@ -119,5 +124,4 @@ public int hashCode() { result = 31 * result + ioPath.hashCode(); return result; } - } diff --git a/src/main/java/htsjdk/beta/io/bundle/InputStreamResource.java b/src/main/java/htsjdk/beta/io/bundle/InputStreamResource.java index 2bde148b22..b86158c2c7 100644 --- a/src/main/java/htsjdk/beta/io/bundle/InputStreamResource.java +++ b/src/main/java/htsjdk/beta/io/bundle/InputStreamResource.java @@ -2,7 +2,6 @@ import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.utils.ValidationUtils; - import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -13,9 +12,9 @@ */ public class InputStreamResource extends BundleResourceBase { private static final long serialVersionUID = 1L; - private static final int MINIMUM_STREAM_BUFFER_SIZE = 64*1024; - private final InputStream rawInputStream; // the stream as provided by the caller - private BufferedInputStream bufferedInputStream; // buffered stream wrapper to compensate for signature probing + private static final int MINIMUM_STREAM_BUFFER_SIZE = 64 * 1024; + private final InputStream rawInputStream; // the stream as provided by the caller + private BufferedInputStream bufferedInputStream; // buffered stream wrapper to compensate for signature probing /** * Create a {@link BundleResource} backed by an InputStream. @@ -44,10 +43,7 @@ public InputStreamResource(final InputStream inputStream, final String displayNa * @param format The format for this resource. May not be null or 0-length. */ public InputStreamResource( - final InputStream inputStream, - final String displayName, - final String contentType, - final String format) { + final InputStream inputStream, final String displayName, final String contentType, final String format) { super(displayName, contentType, format); ValidationUtils.nonNull(inputStream, "input stream"); this.rawInputStream = inputStream; @@ -76,14 +72,14 @@ public SignatureStream getSignatureStream(final int signatureProbeLength) { // is ultimately consumed once signature probing is complete, it will be consumed from the // beginning. bufferedInputStream = new BufferedInputStream( - rawInputStream, - Integer.max(signatureProbeLength, MINIMUM_STREAM_BUFFER_SIZE)); + rawInputStream, Integer.max(signatureProbeLength, MINIMUM_STREAM_BUFFER_SIZE)); bufferedInputStream.mark(signatureProbeLength); bufferedInputStream.read(signaturePrefix); bufferedInputStream.reset(); } catch (final IOException e) { throw new HtsjdkIOException( - String.format("Error during signature probing on %s with prefix size %d", + String.format( + "Error during signature probing on %s with prefix size %d", this.getDisplayName(), signatureProbeLength), e); } @@ -91,7 +87,9 @@ public SignatureStream getSignatureStream(final int signatureProbeLength) { } @Override - public boolean hasInputType() { return true; } + public boolean hasInputType() { + return true; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/htsjdk/beta/io/bundle/OutputStreamResource.java b/src/main/java/htsjdk/beta/io/bundle/OutputStreamResource.java index 56fa46ca22..d176f74ddd 100644 --- a/src/main/java/htsjdk/beta/io/bundle/OutputStreamResource.java +++ b/src/main/java/htsjdk/beta/io/bundle/OutputStreamResource.java @@ -2,7 +2,6 @@ import htsjdk.beta.exception.HtsjdkException; import htsjdk.utils.ValidationUtils; - import java.io.OutputStream; import java.util.Optional; @@ -33,10 +32,7 @@ public OutputStreamResource(final OutputStream outputStream, final String displa * @param format The format for this resource. May not be null or 0-length. */ public OutputStreamResource( - final OutputStream outputStream, - final String displayName, - final String contentType, - final String format) { + final OutputStream outputStream, final String displayName, final String contentType, final String format) { super(displayName, contentType, format); ValidationUtils.nonNull(outputStream, "output stream"); this.outputStream = outputStream; @@ -54,7 +50,9 @@ public SignatureStream getSignatureStream(int signatureProbeLength) { } @Override - public boolean hasOutputType() { return true; } + public boolean hasOutputType() { + return true; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/htsjdk/beta/io/bundle/SeekableStreamResource.java b/src/main/java/htsjdk/beta/io/bundle/SeekableStreamResource.java index 642f53f572..5c9fe39748 100644 --- a/src/main/java/htsjdk/beta/io/bundle/SeekableStreamResource.java +++ b/src/main/java/htsjdk/beta/io/bundle/SeekableStreamResource.java @@ -3,7 +3,6 @@ import htsjdk.beta.exception.HtsjdkIOException; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.util.Optional; @@ -23,7 +22,8 @@ public class SeekableStreamResource extends InputStreamResource { * @param displayName The display name for this resource. May not be null or 0-length. * @param contentType The content type for this resource. May not be null or 0-length. */ - public SeekableStreamResource(final SeekableStream seekableStream, final String displayName, final String contentType) { + public SeekableStreamResource( + final SeekableStream seekableStream, final String displayName, final String contentType) { this(seekableStream, displayName, contentType, null); } @@ -59,7 +59,9 @@ public Optional getInputStream() { } @Override - public Optional getSeekableStream() { return Optional.of(seekableStream); } + public Optional getSeekableStream() { + return Optional.of(seekableStream); + } /** * {@inheritDoc} @@ -69,7 +71,7 @@ public Optional getInputStream() { */ @Override public SignatureStream getSignatureStream(final int signatureProbeLength) { - //we don't want to call the super class' implementation here + // we don't want to call the super class' implementation here final byte[] signaturePrefix = new byte[signatureProbeLength]; try { // for a SeekableStreamResource, we don't want this code to close the actual SeekableStream that @@ -81,19 +83,23 @@ public SignatureStream getSignatureStream(final int signatureProbeLength) { seekableStream.seek(0); } catch (final IOException e) { throw new HtsjdkIOException( - String.format("Error creating signature probe for seekable stream resource with prefix size %d", + String.format( + "Error creating signature probe for seekable stream resource with prefix size %d", signatureProbeLength), e); } return new SignatureStream(signatureProbeLength, signaturePrefix); - } @Override - public boolean hasInputType() { return true; } + public boolean hasInputType() { + return true; + } @Override - public boolean hasSeekableStream() { return true; } + public boolean hasSeekableStream() { + return true; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/htsjdk/beta/io/bundle/SignatureStream.java b/src/main/java/htsjdk/beta/io/bundle/SignatureStream.java index 4feaa88433..32a631a7fd 100644 --- a/src/main/java/htsjdk/beta/io/bundle/SignatureStream.java +++ b/src/main/java/htsjdk/beta/io/bundle/SignatureStream.java @@ -32,6 +32,7 @@ public SignatureStream(final int signaturePrefixLength, final byte[] signaturePr * * @return the maximum number of bytes that can be consumed from this stream. */ - public final int getSignaturePrefixLength() { return signaturePrefixLength;} - + public final int getSignaturePrefixLength() { + return signaturePrefixLength; + } } diff --git a/src/main/java/htsjdk/beta/package-info.java b/src/main/java/htsjdk/beta/package-info.java index 181e0e1c1b..a039f8ef0d 100644 --- a/src/main/java/htsjdk/beta/package-info.java +++ b/src/main/java/htsjdk/beta/package-info.java @@ -9,5 +9,3 @@ * {@link htsjdk.annotations.BetaAPI}. Subpackages may be subsequently released as part of the public API, * at which time they will be moved out of the {@link htsjdk.beta} package. */ - - diff --git a/src/main/java/htsjdk/beta/plugin/HtsCodec.java b/src/main/java/htsjdk/beta/plugin/HtsCodec.java index a128fc19b8..ceb6b2800c 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsCodec.java +++ b/src/main/java/htsjdk/beta/plugin/HtsCodec.java @@ -1,12 +1,12 @@ package htsjdk.beta.plugin; +import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.io.bundle.SignatureStream; import htsjdk.beta.plugin.reads.ReadsFormats; import htsjdk.beta.plugin.registry.HtsCodecRegistry; import htsjdk.io.IOPath; -import htsjdk.beta.io.bundle.Bundle; /** * Base interface implemented by all {@link htsjdk.beta.plugin} codecs. @@ -76,7 +76,7 @@ *
  • For {@link HtsContentType#ALIGNED_READS} codecs, see the {@link htsjdk.beta.plugin.reads} package
  • *
  • For {@link HtsContentType#HAPLOID_REFERENCE} codecs, see the {@link htsjdk.beta.plugin.hapref} package
  • *
  • For {@link HtsContentType#VARIANT_CONTEXTS} codecs, see the {@link htsjdk.beta.plugin.variants} package
  • - *
  • For {@link HtsContentType#FEATURES} codecs, see the {@link htsjdk.beta.plugin.features} package
  • + *
  • For {@link HtsContentType#FEATURES} codecs, see the {@code htsjdk.beta.plugin.features} package
  • * *

    *

    Example Content Type: Reads

    @@ -221,7 +221,8 @@ public interface HtsCodecioPath (obtained via {@link IOPath#getURI()}) @@ -362,7 +365,9 @@ default String getDisplayName() { * may throw exceptions. *

    */ - default int getSignatureProbeLength() { return getSignatureLength(); } + default int getSignatureProbeLength() { + return getSignatureLength(); + } /** * Get an {@link HtsDecoder} to decode the provided inputs. The input bundle must contain @@ -397,5 +402,4 @@ default String getDisplayName() { * @return an {@link HtsEncoder} suitable for writing to the provided outputs */ HtsEncoder getEncoder(final Bundle outputBundle, final E encoderOptions); - } diff --git a/src/main/java/htsjdk/beta/plugin/HtsContentType.java b/src/main/java/htsjdk/beta/plugin/HtsContentType.java index abe4312fd7..427f6bcd82 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsContentType.java +++ b/src/main/java/htsjdk/beta/plugin/HtsContentType.java @@ -15,7 +15,7 @@ *
  • For {@link HtsContentType#HAPLOID_REFERENCE} codecs, see the {@link htsjdk.beta.plugin.hapref} package
  • *
  • For {@link HtsContentType#ALIGNED_READS} codecs, see the {@link htsjdk.beta.plugin.reads} package
  • *
  • For {@link HtsContentType#VARIANT_CONTEXTS} codecs, see the {@link htsjdk.beta.plugin.variants} package
  • - *
  • For {@link HtsContentType#FEATURES} codecs, see the {@link htsjdk.beta.plugin.features} package
  • + *
  • For {@link HtsContentType#FEATURES} codecs, see the {@code htsjdk.beta.plugin.features} package
  • * *

    * There can be many codecs for a given content type, each representing a different version of an @@ -26,7 +26,7 @@ */ public enum HtsContentType { - //where would a FASTQ codec fit ? in the same category (which implies the same interfaces) ? + // where would a FASTQ codec fit ? in the same category (which implies the same interfaces) ? /** * Haploid reference content type (see {@link HaploidReferenceFormats} for related formats) */ @@ -43,7 +43,7 @@ public enum HtsContentType { VARIANT_CONTEXTS, /** - * Features content type (see {@link htsjdk.beta.plugin.features} for related formats) + * Features content type (see {@code htsjdk.beta.plugin.features} for related formats) */ FEATURES, } diff --git a/src/main/java/htsjdk/beta/plugin/HtsDecoder.java b/src/main/java/htsjdk/beta/plugin/HtsDecoder.java index 4040f1ce3a..b5d4693a66 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsDecoder.java +++ b/src/main/java/htsjdk/beta/plugin/HtsDecoder.java @@ -15,8 +15,7 @@ * @param type param for the header for this format (i.e. SAMFileHeader) * @param type param for the record for this format (i.e. SAMRecord) */ -public interface HtsDecoder - extends HtsQuery, Closeable { +public interface HtsDecoder extends HtsQuery, Closeable { /** * Get the name of the file format supported by this decoder.The format name defines the underlying @@ -52,5 +51,4 @@ public interface HtsDecoder */ @Override void close(); - } diff --git a/src/main/java/htsjdk/beta/plugin/HtsDecoderOptions.java b/src/main/java/htsjdk/beta/plugin/HtsDecoderOptions.java index cb4ae04d53..ab4f686dad 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsDecoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/HtsDecoderOptions.java @@ -3,5 +3,4 @@ /** * Base tag interface for options for {@link HtsDecoder}s. */ -public interface HtsDecoderOptions { -} +public interface HtsDecoderOptions {} diff --git a/src/main/java/htsjdk/beta/plugin/HtsEncoder.java b/src/main/java/htsjdk/beta/plugin/HtsEncoder.java index 824dfcfc79..c8cb9d17cb 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsEncoder.java +++ b/src/main/java/htsjdk/beta/plugin/HtsEncoder.java @@ -2,7 +2,6 @@ import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.BundleResourceType; - import java.io.Closeable; /** @@ -56,5 +55,4 @@ public interface HtsEncoder extends Cl */ @Override void close(); - } diff --git a/src/main/java/htsjdk/beta/plugin/HtsEncoderOptions.java b/src/main/java/htsjdk/beta/plugin/HtsEncoderOptions.java index 6067aaaafd..093ab3fb92 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsEncoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/HtsEncoderOptions.java @@ -3,5 +3,4 @@ /** * Base tag interface for options for {@link HtsEncoder}s. */ -public interface HtsEncoderOptions { -} +public interface HtsEncoderOptions {} diff --git a/src/main/java/htsjdk/beta/plugin/HtsHeader.java b/src/main/java/htsjdk/beta/plugin/HtsHeader.java index 5f93d1153d..c2f6e5a583 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsHeader.java +++ b/src/main/java/htsjdk/beta/plugin/HtsHeader.java @@ -1,10 +1,9 @@ package htsjdk.beta.plugin; -//Note: This is a just a tagging interface used to tag SAMFileHeader, VCFHeader, and SAMSequenceDictionary. +// Note: This is a just a tagging interface used to tag SAMFileHeader, VCFHeader, and SAMSequenceDictionary. // It will be more useful once version upgrading is implemented. /** * Tagging interface used as a type-bound for codec/encoder/decoder header type params. */ -public interface HtsHeader { -} +public interface HtsHeader {} diff --git a/src/main/java/htsjdk/beta/plugin/HtsRecord.java b/src/main/java/htsjdk/beta/plugin/HtsRecord.java index fe5cc0c31c..5b2d7ae094 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsRecord.java +++ b/src/main/java/htsjdk/beta/plugin/HtsRecord.java @@ -1,11 +1,10 @@ package htsjdk.beta.plugin; -//Note: This is a tagging interface currently used as a type-bound for codec/encoder/decoder +// Note: This is a tagging interface currently used as a type-bound for codec/encoder/decoder // record type params, and used to tag SAMRecord, BAMRecord, VariantContext, and ReferenceSequence. // It will be more useful once version upgrading is implemented. /** * Tagging interface used as a type-bound for codec/encoder/decoder record type params. */ -public interface HtsRecord { -} +public interface HtsRecord {} diff --git a/src/main/java/htsjdk/beta/plugin/HtsVersion.java b/src/main/java/htsjdk/beta/plugin/HtsVersion.java index e93e325bb9..31e200ee94 100644 --- a/src/main/java/htsjdk/beta/plugin/HtsVersion.java +++ b/src/main/java/htsjdk/beta/plugin/HtsVersion.java @@ -40,7 +40,8 @@ public HtsVersion(final String versionString) { ValidationUtils.nonNull(versionString); final String[] parts = versionString.split("\\."); if (parts.length != 3) { - throw new IllegalArgumentException(String.format("Unable parse version string as major.minor.patch: '%s'", versionString)); + throw new IllegalArgumentException( + String.format("Unable parse version string as major.minor.patch: '%s'", versionString)); } try { majorVersion = Integer.parseInt(parts[0]); @@ -108,7 +109,7 @@ public int compareTo(HtsVersion o) { ValidationUtils.nonNull(o); if (this.majorVersion == o.majorVersion) { if (this.minorVersion == o.minorVersion) { - if (this.patchVersion == o.patchVersion){ + if (this.patchVersion == o.patchVersion) { return 0; } return this.patchVersion - o.patchVersion; diff --git a/src/main/java/htsjdk/beta/plugin/IOUtils.java b/src/main/java/htsjdk/beta/plugin/IOUtils.java index d2512cac93..e6b052b6c8 100644 --- a/src/main/java/htsjdk/beta/plugin/IOUtils.java +++ b/src/main/java/htsjdk/beta/plugin/IOUtils.java @@ -4,7 +4,6 @@ import htsjdk.io.HtsPath; import htsjdk.io.IOPath; import htsjdk.samtools.util.BlockCompressedOutputStream; - import java.io.BufferedOutputStream; import java.io.File; import java.io.IOException; @@ -42,7 +41,7 @@ public static IOPath createTempPath(final String prefix, final String suffix) { * @param toConvert Path to convert to GATKPath * @return a Path, or null if the input was null. */ - public static HtsPath toHtsPath(Path toConvert){ + public static HtsPath toHtsPath(Path toConvert) { return null == toConvert ? null : new HtsPath(toConvert.toUri().toString()); } @@ -55,17 +54,15 @@ public static HtsPath toHtsPath(Path toConvert){ public static String getStringFromPath(final IOPath ioPath) { try { final StringWriter stringWriter = new StringWriter(); - //TODO: the UTF-8 encoding of these should be codified somewhere else... - Files.lines(ioPath.toPath(), StandardCharsets.UTF_8).forEach( - line -> { - stringWriter.write(line); - stringWriter.append("\n"); - }); + // TODO: the UTF-8 encoding of these should be codified somewhere else... + Files.lines(ioPath.toPath(), StandardCharsets.UTF_8).forEach(line -> { + stringWriter.write(line); + stringWriter.append("\n"); + }); return stringWriter.toString(); } catch (final IOException e) { throw new HtsjdkIOException( - String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), - e); + String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), e); } } @@ -77,21 +74,21 @@ public static String getStringFromPath(final IOPath ioPath) { * @param gzipOutput if true, gzip output */ public static void writeStringToPath(final IOPath ioPath, final String contents, final boolean gzipOutput) { - if (gzipOutput) { - try (final BufferedOutputStream bos = new BufferedOutputStream(ioPath.getOutputStream()); - final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(bos, ioPath.toPath())) { - bcos.write(contents.getBytes()); - } catch (final IOException e) { - throw new HtsjdkIOException( - String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), e); - } - } else { - try (final BufferedOutputStream bos = new BufferedOutputStream(ioPath.getOutputStream())) { - bos.write(contents.getBytes()); - } catch (final IOException e) { - throw new HtsjdkIOException( - String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), e); - } - } + if (gzipOutput) { + try (final BufferedOutputStream bos = new BufferedOutputStream(ioPath.getOutputStream()); + final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(bos, ioPath.toPath())) { + bcos.write(contents.getBytes()); + } catch (final IOException e) { + throw new HtsjdkIOException( + String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), e); + } + } else { + try (final BufferedOutputStream bos = new BufferedOutputStream(ioPath.getOutputStream())) { + bos.write(contents.getBytes()); + } catch (final IOException e) { + throw new HtsjdkIOException( + String.format("Failed to load reads bundle json from: %s", ioPath.getRawInputString()), e); + } + } } } diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HapRefDecoderOptions.java b/src/main/java/htsjdk/beta/plugin/hapref/HapRefDecoderOptions.java index 363264f067..0b2294654f 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HapRefDecoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HapRefDecoderOptions.java @@ -5,5 +5,4 @@ /** * Class for haploid reference decoder options. */ -public class HapRefDecoderOptions implements HtsDecoderOptions { -} +public class HapRefDecoderOptions implements HtsDecoderOptions {} diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceCodec.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceCodec.java index 25b49a3ac0..7bac69149b 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceCodec.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceCodec.java @@ -6,11 +6,11 @@ /** * Base class for all {@link HtsContentType#HAPLOID_REFERENCE} codecs. */ -public interface HaploidReferenceCodec extends HtsCodec< - HaploidReferenceDecoderOptions, - HaploidReferenceEncoderOptions> { +public interface HaploidReferenceCodec + extends HtsCodec { @Override - default HtsContentType getContentType() { return HtsContentType.HAPLOID_REFERENCE; } - + default HtsContentType getContentType() { + return HtsContentType.HAPLOID_REFERENCE; + } } diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoder.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoder.java index 0836ca8c06..9fcf79c411 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoder.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoder.java @@ -8,4 +8,4 @@ /** * Base class for all {@link HtsContentType#HAPLOID_REFERENCE} decoders. */ -public interface HaploidReferenceDecoder extends HtsDecoder { } +public interface HaploidReferenceDecoder extends HtsDecoder {} diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoderOptions.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoderOptions.java index 6c402f22a2..c2c5170047 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceDecoderOptions.java @@ -5,5 +5,4 @@ /** * Class for haploid reference decoder options. */ -public class HaploidReferenceDecoderOptions implements HtsDecoderOptions { -} +public class HaploidReferenceDecoderOptions implements HtsDecoderOptions {} diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoder.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoder.java index e7eab395cf..fda62cd457 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoder.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoder.java @@ -8,4 +8,4 @@ /** * Base class for all {@link HtsContentType#HAPLOID_REFERENCE} encoders. */ -public interface HaploidReferenceEncoder extends HtsEncoder { } +public interface HaploidReferenceEncoder extends HtsEncoder {} diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoderOptions.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoderOptions.java index cb2ea05f91..bc2a9115e0 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceEncoderOptions.java @@ -5,5 +5,4 @@ /** * Class for haploid reference encoder options. */ -public class HaploidReferenceEncoderOptions implements HtsEncoderOptions { -} +public class HaploidReferenceEncoderOptions implements HtsEncoderOptions {} diff --git a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceFormats.java b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceFormats.java index 607189b914..aa25ba996a 100644 --- a/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceFormats.java +++ b/src/main/java/htsjdk/beta/plugin/hapref/HaploidReferenceFormats.java @@ -9,5 +9,4 @@ public class HaploidReferenceFormats { * Fasta format */ public static final String FASTA = "FASTA"; - } diff --git a/src/main/java/htsjdk/beta/plugin/interval/HtsIntervalUtils.java b/src/main/java/htsjdk/beta/plugin/interval/HtsIntervalUtils.java index 06555055d3..570ae422bb 100644 --- a/src/main/java/htsjdk/beta/plugin/interval/HtsIntervalUtils.java +++ b/src/main/java/htsjdk/beta/plugin/interval/HtsIntervalUtils.java @@ -1,16 +1,14 @@ package htsjdk.beta.plugin.interval; +import htsjdk.annotations.InternalAPI; import htsjdk.samtools.QueryInterval; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.util.Arrays; import java.util.List; import java.util.stream.Collectors; - /** * Methods for interconverting between HtsQueryInterval and existing htsjdk types such as Locatable/QueryInterval */ @@ -57,9 +55,7 @@ public int getEnd() { @Override public String toString() { - return String.format("%s:%s-%s", - interval.getQueryName(), - interval.getStart(), interval.getEnd()); + return String.format("%s:%s-%s", interval.getQueryName(), interval.getStart(), interval.getEnd()); } }; } @@ -73,10 +69,7 @@ public String toString() { @InternalAPI public static List toLocatableList(final List intervals) { ValidationUtils.nonNull(intervals, "interval list"); - return intervals - .stream() - .map(si -> toLocatable(si)) - .collect(Collectors.toList()); + return intervals.stream().map(si -> toLocatable(si)).collect(Collectors.toList()); } /** @@ -88,14 +81,13 @@ public static List toLocatableList(final List intervals) */ @InternalAPI public static QueryInterval[] toQueryIntervalArray( - final List intervals, - final SAMSequenceDictionary dictionary) { + final List intervals, final SAMSequenceDictionary dictionary) { ValidationUtils.nonNull(intervals, "interval list"); ValidationUtils.nonNull(dictionary, "SAMSequenceDictionary"); - return intervals - .stream() + return intervals.stream() .map(si -> toQueryInterval(si, dictionary)) - .collect(Collectors.toList()).toArray(new QueryInterval[intervals.size()]); + .collect(Collectors.toList()) + .toArray(new QueryInterval[intervals.size()]); } /** @@ -106,8 +98,7 @@ public static QueryInterval[] toQueryIntervalArray( */ @InternalAPI public static List fromQueryIntervalArray( - final QueryInterval[] queryIntervals, - final SAMSequenceDictionary dictionary) { + final QueryInterval[] queryIntervals, final SAMSequenceDictionary dictionary) { return Arrays.stream(queryIntervals) .map(si -> new HtsQueryInterval(si, dictionary)) .collect(Collectors.toList()); @@ -126,7 +117,8 @@ public static int toIntegerSafe(final long coord) { try { return Math.toIntExact(coord); } catch (ArithmeticException e) { - throw new IllegalArgumentException(String.format("long to int conversion of %ld results in integer overflow", coord), e); + throw new IllegalArgumentException( + String.format("long to int conversion of %ld results in integer overflow", coord), e); } } } diff --git a/src/main/java/htsjdk/beta/plugin/interval/HtsQuery.java b/src/main/java/htsjdk/beta/plugin/interval/HtsQuery.java index b541c19786..c5e5b79df5 100644 --- a/src/main/java/htsjdk/beta/plugin/interval/HtsQuery.java +++ b/src/main/java/htsjdk/beta/plugin/interval/HtsQuery.java @@ -3,7 +3,6 @@ import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.samtools.util.CloseableIterator; import htsjdk.utils.ValidationUtils; - import java.util.Collections; import java.util.List; @@ -22,7 +21,7 @@ public interface HtsQuery extends Iterable { @Override CloseableIterator iterator(); - //******************************************* + // ******************************************* // Start temporary common query interface default implementations. /** @@ -63,10 +62,7 @@ default CloseableIterator query(final String queryString) { * @return an iterator over all records from the underlying resource that match the query arguments */ default CloseableIterator query( - final String queryName, - final long start, - final long end, - final HtsQueryRule queryRule) { + final String queryName, final long start, final long end, final HtsQueryRule queryRule) { return query(new HtsQueryInterval(queryName, start, end), queryRule); } @@ -160,7 +156,7 @@ default CloseableIterator queryContained(final List interva return query(intervals, HtsQueryRule.CONTAINED); } - //TODO: match reads that have this start; we *could* just use an HtsInterval with span==1 ? do we need this ? + // TODO: match reads that have this start; we *could* just use an HtsInterval with span==1 ? do we need this ? /** * Get an iterator over all records from the underlying resource that overlap the start position * @@ -172,5 +168,4 @@ default CloseableIterator queryStart(final String queryName, final long ValidationUtils.validateArg(isQueryable(), "Decoder is not queryable"); throw new HtsjdkUnsupportedOperationException("queryStart not implemented for this decoder"); } - } diff --git a/src/main/java/htsjdk/beta/plugin/interval/HtsQueryInterval.java b/src/main/java/htsjdk/beta/plugin/interval/HtsQueryInterval.java index 0cfc202b66..88e2ea030f 100644 --- a/src/main/java/htsjdk/beta/plugin/interval/HtsQueryInterval.java +++ b/src/main/java/htsjdk/beta/plugin/interval/HtsQueryInterval.java @@ -4,7 +4,7 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.utils.ValidationUtils; -//TODO: wild cards 0, +, end of reference/contig +// TODO: wild cards 0, +, end of reference/contig /** * An concrete query interval implementation of {@link HtsInterval} used for random access queries on @@ -22,8 +22,8 @@ public class HtsQueryInterval implements HtsInterval { * @param start the integer start position * @param end the end position */ - public HtsQueryInterval(final String queryName, final long start, final long end){ - //validatePositions(contig, start, end); + public HtsQueryInterval(final String queryName, final long start, final long end) { + // validatePositions(contig, start, end); this.queryName = queryName; this.start = start; this.end = end; @@ -37,22 +37,32 @@ public HtsQueryInterval(final String queryName, final long start, final long end */ public HtsQueryInterval(final QueryInterval queryInterval, final SAMSequenceDictionary dictionary) { ValidationUtils.nonNull(dictionary, "a valid sequence dictionary is required"); - ValidationUtils.nonNull(dictionary.getSequence(queryInterval.referenceIndex), - String.format("query index %d is not present in the provided dictionary", queryInterval.referenceIndex)); - ValidationUtils.nonNull(dictionary.getSequence(queryInterval.referenceIndex).getContig(), - String.format("contig name for index %d is not present in the provided dictionary", queryInterval.referenceIndex)); + ValidationUtils.nonNull( + dictionary.getSequence(queryInterval.referenceIndex), + String.format( + "query index %d is not present in the provided dictionary", queryInterval.referenceIndex)); + ValidationUtils.nonNull( + dictionary.getSequence(queryInterval.referenceIndex).getContig(), + String.format( + "contig name for index %d is not present in the provided dictionary", + queryInterval.referenceIndex)); this.queryName = dictionary.getSequence(queryInterval.referenceIndex).getContig(); this.start = queryInterval.start; this.end = queryInterval.end; } @Override - public String getQueryName() { return queryName; } + public String getQueryName() { + return queryName; + } @Override - public long getStart() { return start; } + public long getStart() { + return start; + } @Override - public long getEnd() { return end; } - + public long getEnd() { + return end; + } } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java index 3d2f244f14..4941ddb5af 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsBundle.java @@ -1,20 +1,19 @@ package htsjdk.beta.plugin.reads; import htsjdk.beta.io.IOPathUtils; -import htsjdk.beta.io.bundle.BundleJSON; -import htsjdk.io.HtsPath; -import htsjdk.io.IOPath; -import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleBuilder; -import htsjdk.beta.io.bundle.IOPathResource; +import htsjdk.beta.io.bundle.BundleJSON; import htsjdk.beta.io.bundle.BundleResource; +import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.io.bundle.IOPathResource; +import htsjdk.io.HtsPath; +import htsjdk.io.IOPath; import htsjdk.samtools.SamFiles; import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Tuple; import htsjdk.utils.ValidationUtils; - import java.io.Serializable; import java.nio.file.Path; import java.util.Arrays; @@ -77,11 +76,11 @@ protected ReadsBundle(final Collection resources) { super(BundleResourceType.CT_ALIGNED_READS, resources); } - /** - * return the {@link BundleResourceType#CT_ALIGNED_READS} {@link BundleResource} for this {@link ReadsBundle} - * - * @return the {@link BundleResourceType#CT_ALIGNED_READS} {@link BundleResource} for this {@link ReadsBundle} - */ + /** + * return the {@link BundleResourceType#CT_ALIGNED_READS} {@link BundleResource} for this {@link ReadsBundle} + * + * @return the {@link BundleResourceType#CT_ALIGNED_READS} {@link BundleResource} for this {@link ReadsBundle} + */ public BundleResource getReads() { return getOrThrow(BundleResourceType.CT_ALIGNED_READS); } @@ -127,9 +126,9 @@ public static ReadsBundle getReadsBundleFromString(final String jsonStri * @return a newly created {@link ReadsBundle} */ public static ReadsBundle getReadsBundleFromString( - final String jsonString, - final Function ioPathConstructor) { - return new ReadsBundle<>(BundleJSON.toBundle(jsonString, ioPathConstructor).getResources()); + final String jsonString, final Function ioPathConstructor) { + return new ReadsBundle<>( + BundleJSON.toBundle(jsonString, ioPathConstructor).getResources()); } /** @@ -157,14 +156,14 @@ public static ReadsBundle resolveIndex(final IOPath reads) { * @return a {@link ReadsBundle} containing reads and companion index, if it can be found */ public static ReadsBundle resolveIndex( - final T reads, - final Function ioPathConstructor) { + final T reads, final Function ioPathConstructor) { if (reads.hasFileSystemProvider()) { final Path index = SamFiles.findIndex(reads.toPath()); if (index == null) { return new ReadsBundle<>(reads); } else { - return new ReadsBundle(reads, ioPathConstructor.apply(index.toUri().toString())); + return new ReadsBundle( + reads, ioPathConstructor.apply(index.toUri().toString())); } } return new ReadsBundle<>(reads); @@ -177,9 +176,7 @@ private static IOPathResource toInputResource(final String pr if (providedContentType != null && !typePair.get().a.equals(providedContentType)) { LOG.warn(String.format( "Provided content type \"%s\" for \"%s\" doesn't match derived content type \"%s\"", - providedContentType, - ioPath.getRawInputString(), - typePair.get().a)); + providedContentType, ioPath.getRawInputString(), typePair.get().a)); } } return new IOPathResource(ioPath, providedContentType); @@ -204,9 +201,8 @@ private static Optional> getInferredCon } else if (ext.equals((FileExtensions.SAM))) { return Optional.of(new Tuple<>(BundleResourceType.CT_ALIGNED_READS, BundleResourceType.FMT_READS_SAM)); } - //TODO: finish this, else SRA, htsget,... + // TODO: finish this, else htsget,... } return Optional.empty(); } - } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsCodec.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsCodec.java index 2958983e64..3c2b05ba1e 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsCodec.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsCodec.java @@ -9,6 +9,7 @@ public interface ReadsCodec extends HtsCodec { @Override - default HtsContentType getContentType() { return HtsContentType.ALIGNED_READS; } - + default HtsContentType getContentType() { + return HtsContentType.ALIGNED_READS; + } } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoder.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoder.java index 681a1b47b9..013f6e47a5 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoder.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoder.java @@ -1,12 +1,11 @@ package htsjdk.beta.plugin.reads; +import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.plugin.HtsContentType; import htsjdk.beta.plugin.HtsDecoder; -import htsjdk.beta.io.bundle.Bundle; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.CloseableIterator; - import java.util.Optional; /** @@ -31,4 +30,3 @@ public interface ReadsDecoder extends HtsDecoder, Read @Override Optional queryMate(SAMRecord rec); } - diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoderOptions.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoderOptions.java index f1ae8a6536..585d567996 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsDecoderOptions.java @@ -1,12 +1,11 @@ package htsjdk.beta.plugin.reads; +import htsjdk.annotations.InternalAPI; import htsjdk.beta.codecs.reads.bam.BAMDecoderOptions; import htsjdk.beta.codecs.reads.cram.CRAMDecoderOptions; import htsjdk.beta.plugin.HtsDecoderOptions; import htsjdk.samtools.ValidationStringency; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.nio.channels.SeekableByteChannel; import java.util.Optional; import java.util.function.Function; @@ -15,15 +14,15 @@ * Reads decoder options (shared/common). */ public class ReadsDecoderOptions implements HtsDecoderOptions { - private ValidationStringency validationStringency = ValidationStringency.STRICT; - private boolean eagerlyDecode = false; // honored by BAM and HtsGet - private boolean fileBasedIndexCached = false; // honored by BAM and CRAM - private boolean memoryMapIndexes = true; // honored by BAM and CRAM - //TODO: replace these with a prefetch size args, and use a local channel wrapper implementation + private ValidationStringency validationStringency = ValidationStringency.STRICT; + private boolean eagerlyDecode = false; // honored by BAM and HtsGet + private boolean fileBasedIndexCached = false; // honored by BAM and CRAM + private boolean memoryMapIndexes = true; // honored by BAM and CRAM + // TODO: replace these with a prefetch size args, and use a local channel wrapper implementation private Function readsChannelTransformer; private Function indexChannelTransformer; - private BAMDecoderOptions bamDecoderOptions = new BAMDecoderOptions(); - private CRAMDecoderOptions cramDecoderOptions = new CRAMDecoderOptions(); + private BAMDecoderOptions bamDecoderOptions = new BAMDecoderOptions(); + private CRAMDecoderOptions cramDecoderOptions = new CRAMDecoderOptions(); /** * Get the {@link ValidationStringency} used for these options. Defaults to {@link ValidationStringency#STRICT}. @@ -112,7 +111,9 @@ public ReadsDecoderOptions setMemoryMapIndexes(final boolean memoryMapIndexes) { * * @return the {@link BAMDecoderOptions} for these options */ - public BAMDecoderOptions getBAMDecoderOptions() { return bamDecoderOptions; } + public BAMDecoderOptions getBAMDecoderOptions() { + return bamDecoderOptions; + } /** * Set the {@link BAMDecoderOptions} used for these options. @@ -131,7 +132,9 @@ public ReadsDecoderOptions setBAMDecoderOptions(final BAMDecoderOptions bamDecod * * @return the {@link CRAMDecoderOptions} for these options */ - public CRAMDecoderOptions getCRAMDecoderOptions() { return cramDecoderOptions; } + public CRAMDecoderOptions getCRAMDecoderOptions() { + return cramDecoderOptions; + } /** * Set the {@link CRAMDecoderOptions} for these ReadsDecoderOptions. @@ -196,5 +199,4 @@ public ReadsDecoderOptions setIndexChannelTransformer( this.indexChannelTransformer = indexChannelTransformer; return this; } - } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoder.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoder.java index 7c16fc2527..042a19bec6 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoder.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoder.java @@ -8,4 +8,4 @@ /** * Base interface for {@link HtsContentType#ALIGNED_READS} encoders. */ -public interface ReadsEncoder extends HtsEncoder { } +public interface ReadsEncoder extends HtsEncoder {} diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoderOptions.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoderOptions.java index 711522f8c4..7e4114317c 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsEncoderOptions.java @@ -40,7 +40,9 @@ public ReadsEncoderOptions setPreSorted(boolean preSorted) { * * @return the {@link BAMEncoderOptions} for these ReadsEncoderOptions */ - public BAMEncoderOptions getBAMEncoderOptions() { return bamEncoderOptions; } + public BAMEncoderOptions getBAMEncoderOptions() { + return bamEncoderOptions; + } /** * Set the {@link BAMEncoderOptions} for these ReadsEncoderOptions. Defaults values are default @@ -76,5 +78,4 @@ public ReadsEncoderOptions setCRAMEncoderOptions(final CRAMEncoderOptions cramEn this.cramEncoderOptions = cramEncoderOptions; return this; } - } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsFormats.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsFormats.java index d406da3734..f6a3791182 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsFormats.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsFormats.java @@ -24,5 +24,4 @@ public class ReadsFormats { * GA4GH htsget BAM format. */ public static final String HTSGET_BAM = "HTSGET_BAM"; - } diff --git a/src/main/java/htsjdk/beta/plugin/reads/ReadsQuery.java b/src/main/java/htsjdk/beta/plugin/reads/ReadsQuery.java index 0ef6821b82..707579e310 100644 --- a/src/main/java/htsjdk/beta/plugin/reads/ReadsQuery.java +++ b/src/main/java/htsjdk/beta/plugin/reads/ReadsQuery.java @@ -2,7 +2,6 @@ import htsjdk.beta.plugin.HtsRecord; import htsjdk.samtools.util.CloseableIterator; - import java.util.Optional; /** diff --git a/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java b/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java index 676a5ffe3c..6b59d45f64 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java +++ b/src/main/java/htsjdk/beta/plugin/registry/HaploidReferenceResolver.java @@ -15,7 +15,6 @@ import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; @@ -70,8 +69,7 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(final IOPath inputPath * that the registry contains an incorrectly written codec. */ public HaploidReferenceDecoder getHaploidReferenceDecoder( - final IOPath inputPath, - final HaploidReferenceDecoderOptions HaploidReferenceDecoderOptions) { + final IOPath inputPath, final HaploidReferenceDecoderOptions HaploidReferenceDecoderOptions) { ValidationUtils.nonNull(inputPath, "Input path"); ValidationUtils.nonNull(HaploidReferenceDecoderOptions, "Decoder options"); @@ -109,12 +107,12 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder(final Bundle inputBund */ @SuppressWarnings("unchecked") public HaploidReferenceDecoder getHaploidReferenceDecoder( - final Bundle inputBundle, - final HaploidReferenceDecoderOptions HaploidReferenceDecoderOptions) { + final Bundle inputBundle, final HaploidReferenceDecoderOptions HaploidReferenceDecoderOptions) { ValidationUtils.nonNull(inputBundle, "Input bundle"); ValidationUtils.nonNull(HaploidReferenceDecoderOptions, "Decoder options"); - return (HaploidReferenceDecoder) resolveForDecoding(inputBundle).getDecoder(inputBundle, HaploidReferenceDecoderOptions); + return (HaploidReferenceDecoder) + resolveForDecoding(inputBundle).getDecoder(inputBundle, HaploidReferenceDecoderOptions); } /** @@ -126,38 +124,33 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder( * @return a reference Bundle * @param */ - public static Bundle referenceBundleFromFastaPath(final IOPath fastaPath, final Function ioPathConstructor) { + public static Bundle referenceBundleFromFastaPath( + final IOPath fastaPath, final Function ioPathConstructor) { final BundleBuilder referenceBundleBuilder = new BundleBuilder(); referenceBundleBuilder.addPrimary(new IOPathResource(fastaPath, BundleResourceType.CT_HAPLOID_REFERENCE)); final Path dictPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(fastaPath.toPath()); if (Files.exists(dictPath)) { - referenceBundleBuilder.addSecondary( - new IOPathResource( - ioPathConstructor.apply(dictPath.toUri().toString()), - BundleResourceType.CT_REFERENCE_DICTIONARY)); + referenceBundleBuilder.addSecondary(new IOPathResource( + ioPathConstructor.apply(dictPath.toUri().toString()), BundleResourceType.CT_REFERENCE_DICTIONARY)); } final Path idxPath = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaPath.toPath()); if (Files.exists(idxPath)) { - referenceBundleBuilder.addSecondary( - new IOPathResource( - ioPathConstructor.apply(idxPath.toUri().toString()), - BundleResourceType.CT_REFERENCE_INDEX)); + referenceBundleBuilder.addSecondary(new IOPathResource( + ioPathConstructor.apply(idxPath.toUri().toString()), BundleResourceType.CT_REFERENCE_INDEX)); } try { if (IOUtil.isBlockCompressed(fastaPath.toPath(), true)) { final Path gziPath = GZIIndex.resolveIndexNameForBgzipFile(fastaPath.toPath()); - referenceBundleBuilder.addSecondary( - new IOPathResource( - ioPathConstructor.apply(gziPath.toUri().toString()), - BundleResourceType.CT_REFERENCE_INDEX_GZI)); + referenceBundleBuilder.addSecondary(new IOPathResource( + ioPathConstructor.apply(gziPath.toUri().toString()), + BundleResourceType.CT_REFERENCE_INDEX_GZI)); } } catch (IOException e) { throw new HtsjdkException("Error while checking for block compression", e); } return referenceBundleBuilder.build(); } - } diff --git a/src/main/java/htsjdk/beta/plugin/registry/HtsCodecRegistry.java b/src/main/java/htsjdk/beta/plugin/registry/HtsCodecRegistry.java index 9ee13a3c68..ad226c9f17 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/HtsCodecRegistry.java +++ b/src/main/java/htsjdk/beta/plugin/registry/HtsCodecRegistry.java @@ -1,11 +1,12 @@ package htsjdk.beta.plugin.registry; +import htsjdk.beta.exception.HtsjdkPluginException; +import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; import htsjdk.beta.plugin.HtsCodec; import htsjdk.beta.plugin.hapref.HaploidReferenceCodec; import htsjdk.beta.plugin.reads.ReadsCodec; import htsjdk.beta.plugin.variants.VariantsCodec; -import htsjdk.beta.exception.HtsjdkPluginException; -import htsjdk.beta.exception.HtsjdkUnsupportedOperationException; + /** * A registry for tracking {@link HtsCodec} instances. *

    @@ -26,7 +27,7 @@ public class HtsCodecRegistry { * Create a registry. Protected to prevent use outside of the registry package. To create * a private registry from outside the registry package, use {@link #createPrivateRegistry}. */ - protected HtsCodecRegistry() { } + protected HtsCodecRegistry() {} /** * Add a codec to the registry. If a codec that supports the same (format, version) (determined @@ -66,15 +67,21 @@ protected HtsCodecRegistry() { } * * @return a mutable registry instance for private use */ - public synchronized static HtsCodecRegistry createPrivateRegistry() { + public static synchronized HtsCodecRegistry createPrivateRegistry() { final HtsCodecRegistry privateRegistry = new HtsCodecRegistry(); // propagate the codecs from the sourceRegistry to the new registry - HtsDefaultRegistry.htsDefaultCodecRegistry.getHaploidReferenceResolver().getCodecs() + HtsDefaultRegistry.htsDefaultCodecRegistry + .getHaploidReferenceResolver() + .getCodecs() + .forEach(c -> privateRegistry.registerCodec(c)); + HtsDefaultRegistry.htsDefaultCodecRegistry + .getReadsResolver() + .getCodecs() .forEach(c -> privateRegistry.registerCodec(c)); - HtsDefaultRegistry.htsDefaultCodecRegistry.getReadsResolver().getCodecs(). - forEach(c -> privateRegistry.registerCodec(c)); - HtsDefaultRegistry.htsDefaultCodecRegistry.getVariantsResolver().getCodecs() + HtsDefaultRegistry.htsDefaultCodecRegistry + .getVariantsResolver() + .getCodecs() .forEach(c -> privateRegistry.registerCodec(c)); return privateRegistry; } @@ -84,21 +91,25 @@ public synchronized static HtsCodecRegistry createPrivateRegistry() { * * @return the {@link HaploidReferenceResolver} for this registry */ - public synchronized HaploidReferenceResolver getHaploidReferenceResolver() { return htsHaploidReferenceResolver; } + public synchronized HaploidReferenceResolver getHaploidReferenceResolver() { + return htsHaploidReferenceResolver; + } /** * Get the {@link ReadsResolver} for this registry. * * @return the {@link ReadsResolver} for this registry */ - public synchronized ReadsResolver getReadsResolver() { return htsReadsResolver; } + public synchronized ReadsResolver getReadsResolver() { + return htsReadsResolver; + } /** * Get the {@link VariantsResolver} for this registry. * * @return the {@link VariantsResolver} for this registry */ - public synchronized VariantsResolver getVariantsResolver() { return htsVariantsResolver; } - + public synchronized VariantsResolver getVariantsResolver() { + return htsVariantsResolver; + } } - diff --git a/src/main/java/htsjdk/beta/plugin/registry/HtsCodecResolver.java b/src/main/java/htsjdk/beta/plugin/registry/HtsCodecResolver.java index dbfc7d7ed2..cd83db6b4e 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/HtsCodecResolver.java +++ b/src/main/java/htsjdk/beta/plugin/registry/HtsCodecResolver.java @@ -1,18 +1,17 @@ package htsjdk.beta.plugin.registry; +import htsjdk.annotations.InternalAPI; +import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.exception.HtsjdkIOException; -import htsjdk.beta.plugin.HtsCodec; -import htsjdk.beta.plugin.HtsVersion; +import htsjdk.beta.exception.HtsjdkPluginException; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResource; import htsjdk.beta.io.bundle.SignatureStream; -import htsjdk.beta.exception.HtsjdkException; -import htsjdk.beta.exception.HtsjdkPluginException; +import htsjdk.beta.plugin.HtsCodec; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.io.IOPath; import htsjdk.samtools.util.Log; -import htsjdk.annotations.InternalAPI; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.util.Collections; import java.util.HashMap; @@ -37,8 +36,8 @@ public class HtsCodecResolver> { private static final Log LOG = Log.getInstance(HtsCodecResolver.class); - final static String NO_SUPPORTING_CODEC_ERROR = "No registered codec accepts the provided resource"; - final static String MULTIPLE_SUPPORTING_CODECS_ERROR = "Multiple codecs accept the provided resource"; + static final String NO_SUPPORTING_CODEC_ERROR = "No registered codec accepts the provided resource"; + static final String MULTIPLE_SUPPORTING_CODECS_ERROR = "Multiple codecs accept the provided resource"; private final String requiredContentType; private final Map> codecs = new HashMap<>(); @@ -75,9 +74,9 @@ public C registerCodec(final C codec) { // update the version map for this codec final C oldCodec = versionMap.put(codec.getVersion(), codec); if (oldCodec != null) { - LOG.warn(String.format("A previously registered HTS codec (%s) was replaced with the (%s) codec ", - oldCodec.getDisplayName(), - codec.getDisplayName())); + LOG.warn(String.format( + "A previously registered HTS codec (%s) was replaced with the (%s) codec ", + oldCodec.getDisplayName(), codec.getDisplayName())); } return oldCodec; } @@ -149,15 +148,14 @@ public C resolveForDecoding(final Bundle bundle) { final Optional optFormatString = bundleResource.getFileFormat(); final List candidateCodecs = resolveForFormat(optFormatString); - final List resolvedCodecs = bundleResource.getIOPath().isPresent() ? - resolveForDecodingIOPath(bundleResource, candidateCodecs) : - resolveForDecodingStream(bundleResource, candidateCodecs); + final List resolvedCodecs = bundleResource.getIOPath().isPresent() + ? resolveForDecodingIOPath(bundleResource, candidateCodecs) + : resolveForDecodingStream(bundleResource, candidateCodecs); return getOneOrThrow( resolvedCodecs, - () -> String.format("%s/%s", - optFormatString.isPresent () ? optFormatString.get() : "(NONE)", - bundleResource)); + () -> String.format( + "%s/%s", optFormatString.isPresent() ? optFormatString.get() : "(NONE)", bundleResource)); } /** @@ -180,7 +178,9 @@ public C resolveForDecoding(final Bundle bundle) { * @throws HtsjdkPluginException if more than one codec claims to handle the resource. this usually indicates * that the registry contains an incorrectly written codec. */ - public C resolveForEncoding(final Bundle bundle) { return resolveForEncoding(bundle, HtsVersion.NEWEST_VERSION); } + public C resolveForEncoding(final Bundle bundle) { + return resolveForEncoding(bundle, HtsVersion.NEWEST_VERSION); + } /** * Inspect a bundle and find a codec that can encode to the primary resource using the format version @@ -208,16 +208,15 @@ public C resolveForEncoding(final Bundle bundle, final HtsVersion htsVersion) { final List candidateCodecs = resolveForFormat(optFormatString); final Optional ioPath = bundleResource.getIOPath(); - final List filteredCodecs = bundleResource.getIOPath().isPresent() ? - resolveForEncodingIOPath(ioPath.get(), candidateCodecs) : - candidateCodecs; // there isn't anything else to probe when the output is to a stream + final List filteredCodecs = bundleResource.getIOPath().isPresent() + ? resolveForEncodingIOPath(ioPath.get(), candidateCodecs) + : candidateCodecs; // there isn't anything else to probe when the output is to a stream final List resolvedCodecs = filterByVersion(filteredCodecs, htsVersion); return getOneOrThrow( resolvedCodecs, - () -> String.format("%s/%s", - optFormatString.isPresent () ? optFormatString.get() : "(NONE)", - bundleResource)); + () -> String.format( + "%s/%s", optFormatString.isPresent() ? optFormatString.get() : "(NONE)", bundleResource)); } /** @@ -245,9 +244,9 @@ public List resolveForFormat(final String format) { * @throws HtsjdkException if no registered codecs can handle the resource */ public C resolveFormatAndVersion(final String format, final HtsVersion formatVersion) { - final List matchingCodecs = resolveForFormat(format) - .stream() - .filter(codec -> codec.getFileFormat().equals(format) && codec.getVersion().equals(formatVersion)) + final List matchingCodecs = resolveForFormat(format).stream() + .filter(codec -> codec.getFileFormat().equals(format) + && codec.getVersion().equals(formatVersion)) .collect(Collectors.toList()); return getOneOrThrow(matchingCodecs, () -> String.format("%s/%s", format, formatVersion)); } @@ -259,11 +258,8 @@ public C resolveFormatAndVersion(final String format, final HtsVersion formatVer */ public List getCodecs() { // flatten out the codecs into a single list - final List codecList = codecs - .values() - .stream() - .flatMap(map -> map.values().stream()) - .collect(Collectors.toList()); + final List codecList = + codecs.values().stream().flatMap(map -> map.values().stream()).collect(Collectors.toList()); return codecList; } @@ -296,35 +292,28 @@ private List resolveForDecodingStream(final BundleResource bundleResource, fi final byte[] signatureBuffer = getSignatureProbeBuffer(bundleResource, candidateCodecs); return candidateCodecs.stream() .filter(codec -> codec.canDecodeSignature( - new SignatureStream(signatureBuffer.length, signatureBuffer), - bundleResource.getDisplayName())) + new SignatureStream(signatureBuffer.length, signatureBuffer), bundleResource.getDisplayName())) .collect(Collectors.toList()); } - private final byte[] getSignatureProbeBuffer( - final BundleResource bundleResource, - final List candidateCodecs) { + private final byte[] getSignatureProbeBuffer(final BundleResource bundleResource, final List candidateCodecs) { final int maxSignatureProbeLength = getMaxSignatureProbeLength(candidateCodecs); - try (final SignatureStream probingStream = - bundleResource.getIOPath().isPresent() ? - getIOPathSignatureProbingStream(bundleResource, maxSignatureProbeLength) : - bundleResource.getSignatureStream(maxSignatureProbeLength)) { + try (final SignatureStream probingStream = bundleResource.getIOPath().isPresent() + ? getIOPathSignatureProbingStream(bundleResource, maxSignatureProbeLength) + : bundleResource.getSignatureStream(maxSignatureProbeLength)) { // we need to recreate a stream over the underlying signature for each codec, // since some implementations may use their own mark/reset pairs final byte[] signatureBytes = new byte[probingStream.getSignaturePrefixLength()]; final int readSize = probingStream.read(signatureBytes); if (readSize != maxSignatureProbeLength) { - throw new HtsjdkPluginException( - String.format("Failure to read %d bytes from signature stream for %s (only read %d)", - maxSignatureProbeLength, - bundleResource, - readSize)); + throw new HtsjdkPluginException(String.format( + "Failure to read %d bytes from signature stream for %s (only read %d)", + maxSignatureProbeLength, bundleResource, readSize)); } return signatureBytes; } catch (IOException e) { throw new HtsjdkIOException( - String.format("error closing signature stream for %s", bundleResource.getDisplayName()), - e); + String.format("error closing signature stream for %s", bundleResource.getDisplayName()), e); } } @@ -339,21 +328,19 @@ private List resolveForEncodingIOPath(final IOPath ioPath, final List cand final List filteredCodecs = uriHandlers.isEmpty() ? candidateCodecs : uriHandlers; // reduce our candidates based on uri and IOPath - return filteredCodecs.stream() - .filter(c -> c.canDecodeURI(ioPath)) - .collect(Collectors.toList()); + return filteredCodecs.stream().filter(c -> c.canDecodeURI(ioPath)).collect(Collectors.toList()); } private int getMaxSignatureProbeLength(final List candidateCodecs) { // find the longest signature probe length of any candidate return candidateCodecs.stream() .map(codec -> codec.getSignatureProbeLength()) - .max(Integer::compare).orElse(0); + .max(Integer::compare) + .orElse(0); } private SignatureStream getIOPathSignatureProbingStream( - final BundleResource bundleResource, - final int streamPrefixSize) { + final BundleResource bundleResource, final int streamPrefixSize) { ValidationUtils.validateArg(bundleResource.getIOPath().isPresent(), "an IOPath resource is required"); final IOPath inputPath = bundleResource.getIOPath().get(); if (!inputPath.hasFileSystemProvider()) { @@ -364,11 +351,10 @@ private SignatureStream getIOPathSignatureProbingStream( // "claimURI" implementations, or else it would be a known protocol such as "gs://" for // which the user expected a file system to be present. It likely represents user error // (a user entered "hdf://" instead of "hdfs://"), and it will fail anyway, so throw. - throw new IllegalArgumentException( - String.format("The resource (%s) specifies a custom protocol (%s) " + - "which no registered codec claims, and for which no NIO file system provider is available", - bundleResource, - inputPath.getURI().getScheme())); + throw new IllegalArgumentException(String.format( + "The resource (%s) specifies a custom protocol (%s) " + + "which no registered codec claims, and for which no NIO file system provider is available", + bundleResource, inputPath.getURI().getScheme())); } return bundleResource.getSignatureStream(streamPrefixSize); } @@ -387,10 +373,12 @@ protected List filterByVersion(final List candidateCodecs, final HtsVersio // version (since there still can be more than one) final HtsVersion newestVersion = candidateCodecs.stream() .map(c -> c.getVersion()) - .reduce(candidateCodecs.get(0).getVersion(), + .reduce( + candidateCodecs.get(0).getVersion(), (HtsVersion a, HtsVersion b) -> a.compareTo(b) > 0 ? a : b); - return candidateCodecs.stream().filter( - c -> c.getVersion().equals(newestVersion)).collect(Collectors.toList()); + return candidateCodecs.stream() + .filter(c -> c.getVersion().equals(newestVersion)) + .collect(Collectors.toList()); } else { return candidateCodecs.stream() .filter(c -> c.getVersion().equals(htsVersion)) @@ -402,17 +390,13 @@ protected List filterByVersion(final List candidateCodecs, final HtsVersio // or otherwise all registered codecs for this codec format. private List resolveForFormat(final Optional optFormatString) { final List candidateCodecs = - optFormatString.isPresent() ? - resolveForFormat(optFormatString.get()) : - getCodecs(); + optFormatString.isPresent() ? resolveForFormat(optFormatString.get()) : getCodecs(); if (optFormatString.isPresent() && candidateCodecs.isEmpty()) { // warn if the resource format string is present, but doesn't map to any codec registered // with this resolver (/content type). LOG.warn(String.format( "The specified format string (%s) does not correspond to any registered codec for content type (%s)", - optFormatString.get(), - requiredContentType)); - + optFormatString.get(), requiredContentType)); } return candidateCodecs; } @@ -426,14 +410,13 @@ private List getURIOwners(final List candidateCodecs, final IOPath ioPath) if (isCustomURI) { // ensure that all codecs that claim to own this URI honor the contract that says if canDecodeURI // returns true, ownsURI must also return true for the same IOPath - uriHandlers.stream().forEach( - codec -> { - if (!codec.canDecodeURI(ioPath)) { - throw new HtsjdkPluginException( - String.format("The %s codec returned true for ownsURI but false for canDecodeURI for path: %s", - codec, - ioPath.getURI())); - }}); + uriHandlers.stream().forEach(codec -> { + if (!codec.canDecodeURI(ioPath)) { + throw new HtsjdkPluginException(String.format( + "The %s codec returned true for ownsURI but false for canDecodeURI for path: %s", + codec, ioPath.getURI())); + } + }); } return uriHandlers; } @@ -446,21 +429,18 @@ private final BundleResource getPrimaryResource(final Bundle bundle, final boole if (!requiredContentType.equals(bundlePrimaryContentType)) { throw new IllegalArgumentException(String.format( "The primary content type (%s) for the resource does not match the requested content type (%s).", - bundlePrimaryContentType, - requiredContentType)); + bundlePrimaryContentType, requiredContentType)); } // Make sure the resource type is appropriate for encoding or decoding, as requested by the caller if (forEncoding && !bundleResource.hasInputType()) { - throw new IllegalArgumentException( - String.format("The %s resource found (%s) cannot be used as an input resource", - requiredContentType, - bundleResource)); + throw new IllegalArgumentException(String.format( + "The %s resource found (%s) cannot be used as an input resource", + requiredContentType, bundleResource)); } else if (!forEncoding && !bundleResource.hasOutputType()) { // for decoding - throw new IllegalArgumentException( - String.format("The %s resource found (%s) cannot be used as an output resource", - requiredContentType, - bundleResource)); + throw new IllegalArgumentException(String.format( + "The %s resource found (%s) cannot be used as an output resource", + requiredContentType, bundleResource)); } return bundleResource; @@ -468,13 +448,9 @@ private final BundleResource getPrimaryResource(final Bundle bundle, final boole @InternalAPI static > C getOneOrThrow( - final List resolvedCodecs, - final Supplier contextMessage) { + final List resolvedCodecs, final Supplier contextMessage) { if (resolvedCodecs.size() == 0) { - throw new HtsjdkException(String.format( - "%s %s", - NO_SUPPORTING_CODEC_ERROR, - contextMessage.get())); + throw new HtsjdkException(String.format("%s %s", NO_SUPPORTING_CODEC_ERROR, contextMessage.get())); } else if (resolvedCodecs.size() > 1) { final String multipleCodecsMessage = String.format( "%s (%s)\n%s\nThis indicates an internal error in one or more of the codecs:", @@ -486,5 +462,4 @@ private final BundleResource getPrimaryResource(final Bundle bundle, final boole return resolvedCodecs.get(0); } } - } diff --git a/src/main/java/htsjdk/beta/plugin/registry/HtsDefaultRegistry.java b/src/main/java/htsjdk/beta/plugin/registry/HtsDefaultRegistry.java index 6a14758668..a241221399 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/HtsDefaultRegistry.java +++ b/src/main/java/htsjdk/beta/plugin/registry/HtsDefaultRegistry.java @@ -17,7 +17,9 @@ public class HtsDefaultRegistry { /** * statically populate the default registry with any codecs on the classpath */ - static {ServiceLoader.load(HtsCodec.class).forEach(htsDefaultCodecRegistry::registerCodec);} + static { + ServiceLoader.load(HtsCodec.class).forEach(htsDefaultCodecRegistry::registerCodec); + } /** * Grt the {@link HaploidReferenceResolver} resolver for this registry. @@ -25,7 +27,8 @@ public class HtsDefaultRegistry { * @return the {@link HaploidReferenceResolver} resolver for this registry */ public static synchronized HaploidReferenceResolver getHaploidReferenceResolver() { - return htsDefaultCodecRegistry.getHaploidReferenceResolver(); } + return htsDefaultCodecRegistry.getHaploidReferenceResolver(); + } /** * Gt the {@link ReadsResolver} resolver for this registry. @@ -33,7 +36,8 @@ public static synchronized HaploidReferenceResolver getHaploidReferenceResolver( * @return the {@link ReadsResolver} resolver for this registry */ public static synchronized ReadsResolver getReadsResolver() { - return htsDefaultCodecRegistry.getReadsResolver(); } + return htsDefaultCodecRegistry.getReadsResolver(); + } /** * Get the {@link VariantsResolver} resolver for this registry. @@ -41,6 +45,6 @@ public static synchronized ReadsResolver getReadsResolver() { * @return the {@link VariantsResolver} resolver for this registry */ public static synchronized VariantsResolver getVariantsResolver() { - return htsDefaultCodecRegistry.getVariantsResolver(); } - + return htsDefaultCodecRegistry.getVariantsResolver(); + } } diff --git a/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java b/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java index 4e67a4dffd..90fb0796c2 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java +++ b/src/main/java/htsjdk/beta/plugin/registry/ReadsResolver.java @@ -1,24 +1,19 @@ package htsjdk.beta.plugin.registry; -import htsjdk.beta.codecs.reads.cram.cramV3_1.CRAMCodecV3_1; import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.exception.HtsjdkPluginException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleResourceType; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.reads.ReadsBundle; import htsjdk.beta.plugin.reads.ReadsCodec; import htsjdk.beta.plugin.reads.ReadsDecoder; import htsjdk.beta.plugin.reads.ReadsDecoderOptions; import htsjdk.beta.plugin.reads.ReadsEncoder; import htsjdk.beta.plugin.reads.ReadsEncoderOptions; -import htsjdk.beta.plugin.reads.ReadsFormats; import htsjdk.io.IOPath; import htsjdk.utils.ValidationUtils; -import java.util.List; -import java.util.stream.Collectors; - /** * Class with methods for resolving inputs and outputs to reads encoders and decoders. *

    @@ -30,7 +25,7 @@ * {@link ReadsCodec}s, such as {@link ReadsDecoder}, {@link ReadsEncoder}, * {@link htsjdk.beta.plugin.reads.ReadsDecoderOptions}. */ -public class ReadsResolver extends HtsCodecResolver{ +public class ReadsResolver extends HtsCodecResolver { /** * Create a ReadsResolver. @@ -69,9 +64,7 @@ public ReadsDecoder getReadsDecoder(final IOPath inputPath) { * @throws HtsjdkPluginException if more than one codec claims to handle the resource. this usually indicates * that the registry contains an incorrectly written codec. */ - public ReadsDecoder getReadsDecoder( - final IOPath inputPath, - final ReadsDecoderOptions readsDecoderOptions) { + public ReadsDecoder getReadsDecoder(final IOPath inputPath, final ReadsDecoderOptions readsDecoderOptions) { ValidationUtils.nonNull(inputPath, "Input path"); ValidationUtils.nonNull(readsDecoderOptions, "Decoder options"); @@ -107,9 +100,7 @@ public ReadsDecoder getReadsDecoder(final Bundle inputBundle) { * that the registry contains an incorrectly written codec. */ @SuppressWarnings("unchecked") - public ReadsDecoder getReadsDecoder( - final Bundle inputBundle, - final ReadsDecoderOptions readsDecoderOptions) { + public ReadsDecoder getReadsDecoder(final Bundle inputBundle, final ReadsDecoderOptions readsDecoderOptions) { ValidationUtils.nonNull(inputBundle, "Input bundle"); ValidationUtils.nonNull(readsDecoderOptions, "Decoder options"); @@ -148,9 +139,7 @@ public ReadsEncoder getReadsEncoder(final IOPath outputPath) { * @throws HtsjdkPluginException if more than one codec claims to handle the resource. this usually indicates * that the registry contains an incorrectly written codec. */ - public ReadsEncoder getReadsEncoder( - final IOPath outputPath, - final ReadsEncoderOptions readsEncoderOptions) { + public ReadsEncoder getReadsEncoder(final IOPath outputPath, final ReadsEncoderOptions readsEncoderOptions) { ValidationUtils.nonNull(outputPath, "Output path"); ValidationUtils.nonNull(readsEncoderOptions, "Encoder options"); @@ -173,9 +162,7 @@ public ReadsEncoder getReadsEncoder( * that the registry contains an incorrectly written codec. */ @SuppressWarnings("unchecked") - public ReadsEncoder getReadsEncoder( - final Bundle outputBundle, - final ReadsEncoderOptions readsEncoderOptions) { + public ReadsEncoder getReadsEncoder(final Bundle outputBundle, final ReadsEncoderOptions readsEncoderOptions) { ValidationUtils.nonNull(outputBundle, "outputBundle"); ValidationUtils.nonNull(readsEncoderOptions, "Encoder options"); @@ -205,34 +192,7 @@ public ReadsEncoder getReadsEncoder( ValidationUtils.nonNull(readsFormat, "Reads format"); ValidationUtils.nonNull(formatVersion, "File format version"); - return (ReadsEncoder) resolveFormatAndVersion(readsFormat, formatVersion) - .getEncoder(outputBundle, readsEncoderOptions); - } - - /** - * Temporarily override to remove the CRAM 3.1 codec from the list of candidate codecs when the request is for - * the newest version, since it has no write implementation yet. - */ - @Override - protected List filterByVersion(final List candidateCodecs, final HtsVersion htsVersion) { - final List preFilteredCodecs; - if (htsVersion.equals(HtsVersion.NEWEST_VERSION)) { - // if the request is for the newest version, then pre-filter out the CRAM 3.1 codec since it has no - // write implementation yet, and then delegate to the superclass to let it find the newest version among - // the remaining codecs - preFilteredCodecs = candidateCodecs.stream().filter( - c -> !(c.getFileFormat().equals(ReadsFormats.CRAM) - && c.getVersion().equals(CRAMCodecV3_1.VERSION_3_1))) - .collect(Collectors.toList()); - final HtsVersion newestVersion = preFilteredCodecs.stream() - .map(c -> c.getVersion()) - .reduce(candidateCodecs.get(0).getVersion(), - (HtsVersion a, HtsVersion b) -> a.compareTo(b) > 0 ? a : b); - return candidateCodecs.stream().filter( - c -> c.getVersion().equals(newestVersion)).collect(Collectors.toList()); - } else { - preFilteredCodecs = candidateCodecs; - } - return super.filterByVersion(preFilteredCodecs, htsVersion); + return (ReadsEncoder) + resolveFormatAndVersion(readsFormat, formatVersion).getEncoder(outputBundle, readsEncoderOptions); } } diff --git a/src/main/java/htsjdk/beta/plugin/registry/VariantsResolver.java b/src/main/java/htsjdk/beta/plugin/registry/VariantsResolver.java index dd8841377e..e755fa219c 100644 --- a/src/main/java/htsjdk/beta/plugin/registry/VariantsResolver.java +++ b/src/main/java/htsjdk/beta/plugin/registry/VariantsResolver.java @@ -3,11 +3,11 @@ import htsjdk.beta.codecs.variants.vcf.vcfv4_2.VCFCodecV4_2; import htsjdk.beta.exception.HtsjdkException; import htsjdk.beta.exception.HtsjdkPluginException; -import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.io.bundle.Bundle; import htsjdk.beta.io.bundle.BundleBuilder; import htsjdk.beta.io.bundle.BundleResourceType; import htsjdk.beta.io.bundle.IOPathResource; +import htsjdk.beta.plugin.HtsVersion; import htsjdk.beta.plugin.variants.VariantsCodec; import htsjdk.beta.plugin.variants.VariantsDecoder; import htsjdk.beta.plugin.variants.VariantsDecoderOptions; @@ -62,8 +62,7 @@ public VariantsDecoder getVariantsDecoder(final IOPath inputPath) { * that the registry contains an incorrectly written codec. */ public VariantsDecoder getVariantsDecoder( - final IOPath inputPath, - final VariantsDecoderOptions variantsDecoderOptions) { + final IOPath inputPath, final VariantsDecoderOptions variantsDecoderOptions) { ValidationUtils.nonNull(inputPath, "Input path"); ValidationUtils.nonNull(variantsDecoderOptions, "Decoder options"); @@ -102,8 +101,7 @@ public VariantsDecoder getVariantsDecoder(final Bundle inputBundle) { */ @SuppressWarnings("unchecked") public VariantsDecoder getVariantsDecoder( - final Bundle inputBundle, - final VariantsDecoderOptions variantsDecoderOptions) { + final Bundle inputBundle, final VariantsDecoderOptions variantsDecoderOptions) { ValidationUtils.nonNull(inputBundle, "Input bundle"); ValidationUtils.nonNull(variantsDecoderOptions, "Decoder options"); @@ -137,8 +135,7 @@ public VariantsEncoder getVariantsEncoder(final IOPath outputPath) { * that the registry contains an incorrectly written codec. */ public VariantsEncoder getVariantsEncoder( - final IOPath outputPath, - final VariantsEncoderOptions variantsEncoderOptions) { + final IOPath outputPath, final VariantsEncoderOptions variantsEncoderOptions) { ValidationUtils.nonNull(outputPath, "Output path"); ValidationUtils.nonNull(variantsEncoderOptions, "Encoder options"); @@ -159,12 +156,11 @@ public VariantsEncoder getVariantsEncoder( * that the registry contains an incorrectly written codec. */ public VariantsEncoder getVariantsEncoder( - final Bundle outputBundle, - final VariantsEncoderOptions variantsEncoderOptions) { + final Bundle outputBundle, final VariantsEncoderOptions variantsEncoderOptions) { ValidationUtils.nonNull(outputBundle, "Output bundle"); ValidationUtils.nonNull(variantsEncoderOptions, "Encoder options"); - //NOTE: we can't allow the resolver to choose the newest registered codec when writing a + // NOTE: we can't allow the resolver to choose the newest registered codec when writing a // VCF, since the newest codec is v4.3, which has no encoder, so for now explicitly select v4.2 final VariantsCodec variantsCodec = resolveForEncoding(outputBundle, VCFCodecV4_2.VCF_V42_VERSION); return (VariantsEncoder) variantsCodec.getEncoder(outputBundle, variantsEncoderOptions); @@ -194,8 +190,7 @@ public VariantsEncoder getVariantsEncoder( ValidationUtils.nonNull(variantsFormat, "Format"); ValidationUtils.nonNull(formatVersion, "Format version"); - return (VariantsEncoder) resolveFormatAndVersion(variantsFormat, formatVersion) - .getEncoder(outputBundle, variantsEncoderOptions); + return (VariantsEncoder) + resolveFormatAndVersion(variantsFormat, formatVersion).getEncoder(outputBundle, variantsEncoderOptions); } - } diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java index 2a8dae15e4..128d0d2840 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsBundle.java @@ -8,7 +8,6 @@ import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Tuple; import htsjdk.utils.ValidationUtils; - import java.io.Serial; import java.io.Serializable; import java.nio.file.Files; @@ -30,6 +29,7 @@ public class VariantsBundle extends Bundle implements Serializable { @Serial private static final long serialVersionUID = 1L; + private static final Log LOG = Log.getInstance(VariantsBundle.class); /** @@ -108,8 +108,8 @@ public static VariantsBundle getVariantsBundleFromPath(final IOPath jsonPath) { * @param ioPathConstructor a function that takes a string and returns an IOPath-derived class of type {@code T} * @return a {@link VariantsBundle} created from jsonPath */ - public static VariantsBundle getVariantsBundleFromPath(final IOPath jsonPath, - final Function ioPathConstructor) { + public static VariantsBundle getVariantsBundleFromPath( + final IOPath jsonPath, final Function ioPathConstructor) { return getVariantsBundleFromString(IOPathUtils.getStringFromPath(jsonPath), ioPathConstructor); } @@ -133,9 +133,9 @@ public static VariantsBundle getVariantsBundleFromString(final String jsonString * @return a newly created {@link htsjdk.beta.plugin.variants.VariantsBundle} */ public static VariantsBundle getVariantsBundleFromString( - final String jsonString, - final Function ioPathConstructor) { - return new VariantsBundle(BundleJSON.toBundle(jsonString, ioPathConstructor).getResources()); + final String jsonString, final Function ioPathConstructor) { + return new VariantsBundle( + BundleJSON.toBundle(jsonString, ioPathConstructor).getResources()); } /** @@ -166,8 +166,7 @@ public static Optional resolveIndex(final IOPath variants) { * if it can be found */ public static Optional resolveIndex( - final T variantsHtsPath, - final Function ioPathConstructor) { + final T variantsHtsPath, final Function ioPathConstructor) { final Set indexExtensions = Set.of(FileExtensions.TRIBBLE_INDEX, FileExtensions.TABIX_INDEX); for (final String extension : indexExtensions) { final T putativeIndexPath = IOPathUtils.appendExtension(variantsHtsPath, extension, ioPathConstructor); @@ -185,9 +184,7 @@ private static IOPathResource toInputResource(final String pr if (providedContentType != null && !typePair.get().a.equals(providedContentType)) { LOG.warn(String.format( "Provided content type \"%s\" for \"%s\" doesn't match derived content type \"%s\"", - providedContentType, - ioPath.getRawInputString(), - typePair.get().a)); + providedContentType, ioPath.getRawInputString(), typePair.get().a)); } } return new IOPathResource(ioPath, providedContentType); @@ -206,9 +203,11 @@ private static Optional> getInferredCon if (extension.isPresent()) { final String ext = extension.get(); if (ext.equals(FileExtensions.VCF)) { - return Optional.of(new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); + return Optional.of( + new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); } else if (ext.equals(FileExtensions.COMPRESSED_VCF) || ext.equals(FileExtensions.COMPRESSED_VCF_BGZ)) { - return Optional.of(new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); + return Optional.of( + new Tuple<>(BundleResourceType.CT_VARIANT_CONTEXTS, BundleResourceType.FMT_VARIANTS_VCF)); } } return Optional.empty(); diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsCodec.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsCodec.java index 97cc39306b..76591b3f5c 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsCodec.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsCodec.java @@ -9,6 +9,7 @@ public interface VariantsCodec extends HtsCodec { @Override - default HtsContentType getContentType() { return HtsContentType.VARIANT_CONTEXTS; } - + default HtsContentType getContentType() { + return HtsContentType.VARIANT_CONTEXTS; + } } diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoder.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoder.java index 1d19461c1c..420388fcdb 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoder.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoder.java @@ -8,4 +8,4 @@ /** * Base class for all {@link HtsContentType#VARIANT_CONTEXTS} decoders. */ -public interface VariantsDecoder extends HtsDecoder { } +public interface VariantsDecoder extends HtsDecoder {} diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoderOptions.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoderOptions.java index 46a2abad5a..9a112c2dff 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsDecoderOptions.java @@ -1,14 +1,13 @@ package htsjdk.beta.plugin.variants; -import htsjdk.beta.plugin.HtsDecoderOptions; import htsjdk.annotations.InternalAPI; - +import htsjdk.beta.plugin.HtsDecoderOptions; import java.nio.channels.SeekableByteChannel; import java.util.Optional; import java.util.function.Function; public class VariantsDecoderOptions implements HtsDecoderOptions { - //TODO: replace these with a prefetch size args, and use a local channel wrapper implementation + // TODO: replace these with a prefetch size args, and use a local channel wrapper implementation private Function variantsChannelTransformer; private Function indexChannelTransformer; @@ -63,5 +62,4 @@ public VariantsDecoderOptions setIndexChannelTransformer( this.indexChannelTransformer = indexChannelTransformer; return this; } - } diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoder.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoder.java index 9840047e69..932f3deea2 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoder.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoder.java @@ -8,4 +8,4 @@ /** * Base class for all {@link HtsContentType#VARIANT_CONTEXTS} encoders. */ -public interface VariantsEncoder extends HtsEncoder { } +public interface VariantsEncoder extends HtsEncoder {} diff --git a/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoderOptions.java b/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoderOptions.java index 94182dc4d3..0b09fd3335 100644 --- a/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoderOptions.java +++ b/src/main/java/htsjdk/beta/plugin/variants/VariantsEncoderOptions.java @@ -6,12 +6,11 @@ import htsjdk.samtools.Defaults; public class VariantsEncoderOptions implements HtsEncoderOptions { - private boolean writeSitesOnly = false; - private boolean writeFullFormatField = false; - private boolean allowFieldsMissingFromHeader = false; - private boolean isAsyncIO = false; - private int bufferSize = Defaults.NON_ZERO_BUFFER_SIZE; // 128k - + private boolean writeSitesOnly = false; + private boolean writeFullFormatField = false; + private boolean allowFieldsMissingFromHeader = false; + private boolean isAsyncIO = false; + private int bufferSize = Defaults.NON_ZERO_BUFFER_SIZE; // 128k /** * Get the buffer size used when writing to an {@link IOPathResource}. Defaults @@ -115,5 +114,4 @@ public VariantsEncoderOptions setWriteFullFormatField(boolean writeFullFormatFie this.writeFullFormatField = writeFullFormatField; return this; } - } diff --git a/src/main/java/htsjdk/io/AsyncWriterPool.java b/src/main/java/htsjdk/io/AsyncWriterPool.java index f95bcbc7db..dab043ce39 100644 --- a/src/main/java/htsjdk/io/AsyncWriterPool.java +++ b/src/main/java/htsjdk/io/AsyncWriterPool.java @@ -1,7 +1,6 @@ package htsjdk.io; import htsjdk.samtools.util.RuntimeIOException; - import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; @@ -21,7 +20,6 @@ public class AsyncWriterPool implements Closeable { private final ExecutorService executor; private final List> writers = new ArrayList<>(); - // The amount of time to wait on the queue in the event of catastrophic failure in the writer threads. private int timeoutSeconds = 5; @@ -65,7 +63,10 @@ public AsyncWriterPool() { public void close() throws IOException { if (this.poolClosed) return; this.poolClosed = true; - CompletableFuture.allOf(this.writers.stream().map(PooledWriter::nonBlockingClose).toArray(CompletableFuture[]::new)).join(); + CompletableFuture.allOf(this.writers.stream() + .map(PooledWriter::nonBlockingClose) + .toArray(CompletableFuture[]::new)) + .join(); this.executor.shutdown(); } @@ -104,7 +105,6 @@ public Writer pool(final Writer writer, final BlockingQueue queue, return pooledWriter; } - /** * Any class that implements {@link Writer} can be exchanged for a {@code PooledWriter}. The PooledWriter provides * the same API as {@link Writer}, but will manage buffering of writes and sending to the {@link AsyncWriterPool} it @@ -122,7 +122,8 @@ private class PooledWriter implements Writer { private boolean isClosed = false; - // Holds the Future of the last task submitted to the AsyncWriterPools until it is checked, then it is null again. + // Holds the Future of the last task submitted to the AsyncWriterPools until it is checked, then it is null + // again. private Future currentTask; /** @@ -137,7 +138,8 @@ private PooledWriter(final Writer writer, final BlockingQueue queue, final if (writeThreshold <= 0) throw new IllegalArgumentException("writeThreshold must be >= 1: " + writeThreshold); if (writeThreshold > queue.remainingCapacity()) - throw new IllegalArgumentException("writeThreshold (" + writeThreshold + ") can't be larger then queue capacity (" + queue.remainingCapacity() + ")."); + throw new IllegalArgumentException("writeThreshold (" + writeThreshold + + ") can't be larger then queue capacity (" + queue.remainingCapacity() + ")."); this.writer = writer; this.queue = queue; @@ -208,7 +210,10 @@ public void write(final A item) { * normal operations the timeout should not come into play and items will add immediately. */ try { - while (!this.isClosed && !this.queue.offer(item, AsyncWriterPool.this.getTimeoutSeconds(), TimeUnit.SECONDS)) { /* Just wait. */ } + while (!this.isClosed + && !this.queue.offer(item, AsyncWriterPool.this.getTimeoutSeconds(), TimeUnit.SECONDS)) { + /* Just wait. */ + } } catch (InterruptedException e) { throw new RuntimeException("Exception while placing item in queue", e); } @@ -251,14 +256,16 @@ private void drain() { */ private CompletableFuture nonBlockingClose() { - return CompletableFuture.supplyAsync(() -> { - try { - this.close(); - return null; - } catch (Exception e) { - throw new RuntimeException("Caught exception while closing PooledWriter.", e); - } - }, AsyncWriterPool.this.executor); + return CompletableFuture.supplyAsync( + () -> { + try { + this.close(); + return null; + } catch (Exception e) { + throw new RuntimeException("Caught exception while closing PooledWriter.", e); + } + }, + AsyncWriterPool.this.executor); } /** diff --git a/src/main/java/htsjdk/io/HtsPath.java b/src/main/java/htsjdk/io/HtsPath.java index 035b1ff418..092aca32f6 100644 --- a/src/main/java/htsjdk/io/HtsPath.java +++ b/src/main/java/htsjdk/io/HtsPath.java @@ -1,7 +1,6 @@ package htsjdk.io; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -52,11 +51,11 @@ * * General syntax for an "absolute" URI: * - * : + * {@code :} * * Many "hierarchical" URI schemes use this syntax: * - * ://? + * {@code ://?} * * More specifically: * @@ -65,16 +64,16 @@ * net_path = "//" authority [ abs_path ] * abs_path = "/" path_segments * opaque_part = uric_no_slash *uric - * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," + * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," */ public class HtsPath implements IOPath, Serializable { private static final long serialVersionUID = 1L; private static final String HIERARCHICAL_SCHEME_SEPARATOR = "://"; - private final String rawInputString; // raw input string provided by th user; may or may not have a scheme - private final URI uri; // working URI; always has a scheme ("file" if not otherwise specified) + private final String rawInputString; // raw input string provided by th user; may or may not have a scheme + private final URI uri; // working URI; always has a scheme ("file" if not otherwise specified) private transient String pathFailureReason; // cache the reason for "toPath" conversion failure - private transient Path cachedPath; // cache the Path associated with this URI if its "Path-able" + private transient Path cachedPath; // cache the Path associated with this URI if its "Path-able" /** * Create an HtsPath from a raw input path string. @@ -117,12 +116,14 @@ public String getURIString() { * Return the raw input string provided to the constructor. */ @Override - public String getRawInputString() { return rawInputString; } + public String getRawInputString() { + return rawInputString; + } @Override public boolean hasFileSystemProvider() { // try to find a provider; assume that our URI always has a scheme - for (FileSystemProvider provider: FileSystemProvider.installedProviders()) { + for (FileSystemProvider provider : FileSystemProvider.installedProviders()) { if (provider.getScheme().equalsIgnoreCase(uri.getScheme())) { return true; } @@ -134,10 +135,10 @@ public boolean hasFileSystemProvider() { public boolean isPath() { try { return getCachedPath() != null || toPath() != null; - } catch (ProviderNotFoundException | - FileSystemNotFoundException | - IllegalArgumentException | - AssertionError e) { + } catch (ProviderNotFoundException + | FileSystemNotFoundException + | IllegalArgumentException + | AssertionError e) { // jimfs throws an AssertionError that wraps a URISyntaxException when trying to create path where // the scheme-specific part is missing or incorrect pathFailureReason = e.getMessage(); @@ -192,7 +193,10 @@ public InputStream getInputStream() { return Files.newInputStream(resourcePath); } catch (IOException e) { throw new RuntimeException( - String.format("Could not create open input stream for %s (as URI %s)", getRawInputString(), getURIString()), e); + String.format( + "Could not create open input stream for %s (as URI %s)", + getRawInputString(), getURIString()), + e); } } @@ -206,12 +210,17 @@ public OutputStream getOutputStream() { try { return Files.newOutputStream(resourcePath); } catch (IOException e) { - throw new RuntimeException(String.format("Could not open output stream for %s (as URI %s)", getRawInputString(), getURIString()), e); + throw new RuntimeException( + String.format( + "Could not open output stream for %s (as URI %s)", getRawInputString(), getURIString()), + e); } } // get the cached path associated with this URI if its already been created - protected Path getCachedPath() { return cachedPath; } + protected Path getCachedPath() { + return cachedPath; + } protected void setCachedPath(Path path) { this.cachedPath = path; @@ -251,8 +260,8 @@ private URI getURIForString(final String pathString) { tempURI = getCachedPath().toUri(); } } catch (URISyntaxException uriException) { - //check that the uri wasn't a badly encoded absolute uri of some sort - //if you don't do this it will be treated as a badly formed file:// url + // check that the uri wasn't a badly encoded absolute uri of some sort + // if you don't do this it will be treated as a badly formed file:// url assertNoProblematicScheme(pathString, uriException); // the input string isn't a valid URI; assume its a local (non-URI) file reference, and @@ -265,9 +274,7 @@ private URI getURIForString(final String pathString) { // the user intended to provide a local file reference or a URI, so preserve both final String errorMessage = String.format( "%s can't be interpreted as a local file (%s) or as a URI (%s).", - pathString, - pathException.getMessage(), - uriException.getMessage()); + pathString, pathException.getMessage(), uriException.getMessage()); throw new IllegalArgumentException(errorMessage, pathException); } } @@ -292,28 +299,30 @@ private URI getURIForString(final String pathString) { * @param pathString the path being examined * @param cause the original failure reason */ - static void assertNoProblematicScheme(String pathString, URISyntaxException cause){ - if(pathString.equals(HIERARCHICAL_SCHEME_SEPARATOR)){ + static void assertNoProblematicScheme(String pathString, URISyntaxException cause) { + if (pathString.equals(HIERARCHICAL_SCHEME_SEPARATOR)) { throw new IllegalArgumentException(HIERARCHICAL_SCHEME_SEPARATOR + " is not a valid path.", cause); } final String[] split = pathString.split(HIERARCHICAL_SCHEME_SEPARATOR, -1); final String scheme = split[0]; - if(split.length == 2 && pathString.endsWith(HIERARCHICAL_SCHEME_SEPARATOR)) { - throw new IllegalArgumentException("A path consisting of only a scheme is not allowed: " + pathString, cause); + if (split.length == 2 && pathString.endsWith(HIERARCHICAL_SCHEME_SEPARATOR)) { + throw new IllegalArgumentException( + "A path consisting of only a scheme is not allowed: " + pathString, cause); } - if(split.length > 1){ - if(scheme == null || scheme.isEmpty()){ - throw new IllegalArgumentException("Malformed path " + pathString + " includes an empty scheme.", cause); + if (split.length > 1) { + if (scheme == null || scheme.isEmpty()) { + throw new IllegalArgumentException( + "Malformed path " + pathString + " includes an empty scheme.", cause); } - if(!scheme.equals("file")){ - throw new IllegalArgumentException("Malformed path " + pathString + " includes a scheme: " + scheme + ":// but was an invalid URI." + - "\nCheck that it is fully encoded.", cause); + if (!scheme.equals("file")) { + throw new IllegalArgumentException( + "Malformed path " + pathString + " includes a scheme: " + scheme + ":// but was an invalid URI." + + "\nCheck that it is fully encoded.", + cause); } } - } - } diff --git a/src/main/java/htsjdk/io/IOPath.java b/src/main/java/htsjdk/io/IOPath.java index 7f22df3c4a..71d6ce064f 100644 --- a/src/main/java/htsjdk/io/IOPath.java +++ b/src/main/java/htsjdk/io/IOPath.java @@ -2,7 +2,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.utils.ValidationUtils; - import java.io.InputStream; import java.io.OutputStream; import java.net.URI; @@ -45,7 +44,7 @@ public interface IOPath { * Return true if this {code IOPath} can be resolved to an {@code java.nio} Path. If true, {@code #toPath()} can be * safely called. * - * There are cases where a valid URI with a valid scheme backed by an installed {@code java.nio File System + * There are cases where a valid URI with a valid scheme backed by an installed {@code java.nio} File System * still can't be turned into a {@code java.nio.file.Path}, i.e., the following specifies an invalid * authority "namenode": * @@ -91,7 +90,8 @@ default String getScheme() { default Optional getExtension() { final String hierarchicalPath = getURI().getPath(); if (hierarchicalPath != null) { - final int indexOfLastComponent = hierarchicalPath.lastIndexOf(FileSystems.getDefault().getSeparator()); + final int indexOfLastComponent = + hierarchicalPath.lastIndexOf(FileSystems.getDefault().getSeparator()); if (indexOfLastComponent != -1 && indexOfLastComponent < hierarchicalPath.length() - 1) { final String lastComponent = hierarchicalPath.substring(indexOfLastComponent + 1); if (lastComponent.length() > 0) { @@ -127,9 +127,7 @@ default boolean hasExtension(final String extension) { // We don't want to use {@code #getExtension} here, since it won't work correctly if we're comparing an // extension that uses multiple . chars, such as .fasta.gz. final String hierarchicalPath = getURI().getPath(); - return hierarchicalPath == null ? - false : - hierarchicalPath.toLowerCase().endsWith(extension.toLowerCase()); + return hierarchicalPath == null ? false : hierarchicalPath.toLowerCase().endsWith(extension.toLowerCase()); } /** @@ -140,7 +138,8 @@ default boolean hasExtension(final String extension) { */ default Optional getBaseName() { final String hierarchicalPath = getURI().getPath(); - final int indexOfLastComponent = hierarchicalPath.lastIndexOf(FileSystems.getDefault().getSeparator()); + final int indexOfLastComponent = + hierarchicalPath.lastIndexOf(FileSystems.getDefault().getSeparator()); if (indexOfLastComponent != -1 && indexOfLastComponent < hierarchicalPath.length() - 1) { final String lastComponent = hierarchicalPath.substring(indexOfLastComponent + 1); if (lastComponent.length() > 0) { diff --git a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java index 9df206ecfd..7989ca3184 100644 --- a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java @@ -25,7 +25,6 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; import java.util.ArrayList; import java.util.Arrays; @@ -60,11 +59,16 @@ protected AbstractBAMFileIndex(final File file, final SAMSequenceDictionary dict this(new MemoryMappedFileBuffer(file), file.getName(), dictionary); } - protected AbstractBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { - this((useMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file)), file.getName(), dictionary); + protected AbstractBAMFileIndex( + final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { + this( + (useMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file)), + file.getName(), + dictionary); } - protected AbstractBAMFileIndex(final IndexFileBuffer indexFileBuffer, final String source, final SAMSequenceDictionary dictionary) { + protected AbstractBAMFileIndex( + final IndexFileBuffer indexFileBuffer, final String source, final SAMSequenceDictionary dictionary) { mIndexBuffer = indexFileBuffer; mBamDictionary = dictionary; verifyIndexMagicNumber(source); @@ -87,9 +91,10 @@ public static int getNumIndexLevels() { return GenomicIndexUtil.LEVEL_STARTS.length; } - private static void assertLevelIsValid (final int levelNumber) { + private static void assertLevelIsValid(final int levelNumber) { if (levelNumber >= getNumIndexLevels()) { - throw new SAMException("Level number (" + levelNumber + ") is greater than or equal to maximum (" + getNumIndexLevels() + ")."); + throw new SAMException("Level number (" + levelNumber + ") is greater than or equal to maximum (" + + getNumIndexLevels() + ")."); } } @@ -112,7 +117,7 @@ public static int getFirstBinInLevel(final int levelNumber) { public int getLevelSize(final int levelNumber) { assertLevelIsValid(levelNumber); - if (levelNumber == getNumIndexLevels()-1) { + if (levelNumber == getNumIndexLevels() - 1) { return GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[levelNumber] - 1; } else { return GenomicIndexUtil.LEVEL_STARTS[levelNumber + 1] - GenomicIndexUtil.LEVEL_STARTS[levelNumber]; @@ -125,13 +130,12 @@ public int getLevelSize(final int levelNumber) { * @return the level associated with the given bin number. */ public int getLevelForBin(final Bin bin) { - if(bin.getBinNumber() >= GenomicIndexUtil.MAX_BINS) + if (bin.getBinNumber() >= GenomicIndexUtil.MAX_BINS) throw new SAMException("Tried to get level for invalid bin."); - for(int i = getNumIndexLevels()-1; i >= 0; i--) { - if(bin.getBinNumber() >= GenomicIndexUtil.LEVEL_STARTS[i]) - return i; + for (int i = getNumIndexLevels() - 1; i >= 0; i--) { + if (bin.getBinNumber() >= GenomicIndexUtil.LEVEL_STARTS[i]) return i; } - throw new SAMException("Unable to find correct bin for bin "+bin); + throw new SAMException("Unable to find correct bin for bin " + bin); } /** @@ -142,8 +146,11 @@ public int getLevelForBin(final Bin bin) { public int getFirstLocusInBin(final Bin bin) { final int level = getLevelForBin(bin); final int levelStart = GenomicIndexUtil.LEVEL_STARTS[level]; - final int levelSize = ((level==getNumIndexLevels()-1) ? GenomicIndexUtil.MAX_BINS-1 : GenomicIndexUtil.LEVEL_STARTS[level+1]) - levelStart; - return (bin.getBinNumber() - levelStart)*(GenomicIndexUtil.BIN_GENOMIC_SPAN /levelSize)+1; + final int levelSize = ((level == getNumIndexLevels() - 1) + ? GenomicIndexUtil.MAX_BINS - 1 + : GenomicIndexUtil.LEVEL_STARTS[level + 1]) + - levelStart; + return (bin.getBinNumber() - levelStart) * (GenomicIndexUtil.BIN_GENOMIC_SPAN / levelSize) + 1; } /** @@ -154,8 +161,11 @@ public int getFirstLocusInBin(final Bin bin) { public int getLastLocusInBin(final Bin bin) { final int level = getLevelForBin(bin); final int levelStart = GenomicIndexUtil.LEVEL_STARTS[level]; - final int levelSize = ((level==getNumIndexLevels()-1) ? GenomicIndexUtil.MAX_BINS-1 : GenomicIndexUtil.LEVEL_STARTS[level+1]) - levelStart; - return (bin.getBinNumber()-levelStart+1)*(GenomicIndexUtil.BIN_GENOMIC_SPAN /levelSize); + final int levelSize = ((level == getNumIndexLevels() - 1) + ? GenomicIndexUtil.MAX_BINS - 1 + : GenomicIndexUtil.LEVEL_STARTS[level + 1]) + - levelStart; + return (bin.getBinNumber() - levelStart + 1) * (GenomicIndexUtil.BIN_GENOMIC_SPAN / levelSize); } public int getNumberOfReferences() { @@ -271,7 +281,7 @@ protected BAMIndexContent query(final int referenceSequence, final int startPos, final int binCount = readInteger(); boolean metaDataSeen = false; - final Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) +1]; + final Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) + 1]; for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(); final int nChunks = readInteger(); @@ -279,7 +289,7 @@ protected BAMIndexContent query(final int referenceSequence, final int startPos, // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks); Chunk lastChunk = null; if (regionBins.get(indexBin)) { - chunks = new ArrayList(nChunks); + chunks = new ArrayList(nChunks); readChunks(nChunks, chunks); } else if (indexBin == GenomicIndexUtil.MAX_BINS) { // meta data - build the bin so that the count of bins is correct; @@ -300,20 +310,25 @@ protected BAMIndexContent query(final int referenceSequence, final int startPos, final int nLinearBins = readInteger(); final int regionLinearBinStart = LinearIndex.convertToLinearIndexOffset(startPos); - final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins-1; - final int actualStop = Math.min(regionLinearBinStop, nLinearBins -1); + final int regionLinearBinStop = endPos > 0 ? LinearIndex.convertToLinearIndexOffset(endPos) : nLinearBins - 1; + final int actualStop = Math.min(regionLinearBinStop, nLinearBins - 1); long[] linearIndexEntries = new long[0]; if (regionLinearBinStart < nLinearBins) { - linearIndexEntries = new long[actualStop-regionLinearBinStart+1]; + linearIndexEntries = new long[actualStop - regionLinearBinStart + 1]; skipBytes(8 * regionLinearBinStart); - for(int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++) - linearIndexEntries[linearBin-regionLinearBinStart] = readLong(); + for (int linearBin = regionLinearBinStart; linearBin <= actualStop; linearBin++) + linearIndexEntries[linearBin - regionLinearBinStart] = readLong(); } - final LinearIndex linearIndex = new LinearIndex(referenceSequence,regionLinearBinStart,linearIndexEntries); + final LinearIndex linearIndex = new LinearIndex(referenceSequence, regionLinearBinStart, linearIndexEntries); - return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), new BAMIndexMetaData(metaDataChunks), linearIndex); + return new BAMIndexContent( + referenceSequence, + bins, + binCount - (metaDataSeen ? 1 : 0), + new BAMIndexMetaData(metaDataChunks), + linearIndex); } /** @@ -338,7 +353,7 @@ static int getMaxBinNumberForSequenceLength(final int sequenceLength) { // return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1) } - abstract protected BAMIndexContent getQueryResults(int reference); + protected abstract BAMIndexContent getQueryResults(int reference); /** * Gets the possible number of bins for a given reference sequence. @@ -375,8 +390,7 @@ protected void verifyIndexMagicNumber(final String sourceName) { final byte[] buffer = new byte[4]; readBytes(buffer); if (!Arrays.equals(buffer, BAMFileConstants.BAI_INDEX_MAGIC)) { - throw new RuntimeIOException("Invalid file header in BAM index " + sourceName + - ": " + new String(buffer)); + throw new RuntimeIOException("Invalid file header in BAM index " + sourceName + ": " + new String(buffer)); } } @@ -399,11 +413,11 @@ protected void readChunks(int nChunks, List chunks) { } protected void skipToSequence(final int sequenceIndex) { - //Use sequence position cache if available - if(sequenceIndexes[sequenceIndex] != -1){ - seek(sequenceIndexes[sequenceIndex]); - return; - } + // Use sequence position cache if available + if (sequenceIndexes[sequenceIndex] != -1) { + seek(sequenceIndexes[sequenceIndex]); + return; + } // Use previous sequence position if in cache, which optimizes for common access pattern // of iterating through sequences in order. @@ -415,7 +429,7 @@ protected void skipToSequence(final int sequenceIndex) { } else { startSequenceIndex = 0; } - + for (int i = startSequenceIndex; i < sequenceIndex; i++) { // System.out.println("# Sequence TID: " + i); final int nBins = readInteger(); @@ -430,8 +444,8 @@ protected void skipToSequence(final int sequenceIndex) { // System.out.println("# nLinearBins: " + nLinearBins); skipBytes(8 * nLinearBins); } - - //Update sequence position cache + + // Update sequence position cache sequenceIndexes[sequenceIndex] = position(); } @@ -454,16 +468,16 @@ protected final void skipBytes(final int count) { protected final void seek(final long position) { mIndexBuffer.seek(position); } - - protected final long position(){ - return mIndexBuffer.position(); + + protected final long position() { + return mIndexBuffer.position(); } protected final SAMSequenceDictionary getBamDictionary() { return mBamDictionary; } - protected final void setSequenceIndexes (int nReferences) { + protected final void setSequenceIndexes(int nReferences) { sequenceIndexes = new long[nReferences + 1]; Arrays.fill(sequenceIndexes, -1); } diff --git a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java index a3e2ce5966..420b057bb5 100644 --- a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java +++ b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java @@ -23,7 +23,6 @@ */ package htsjdk.samtools; - import java.io.Serializable; import java.util.LinkedHashMap; import java.util.Map; @@ -33,11 +32,10 @@ * Base class for the various concrete records in a SAM header, providing uniform * access to the attributes. */ - public abstract class AbstractSAMHeaderRecord implements Serializable { public static final long serialVersionUID = 1L; - private final Map mAttributes = new LinkedHashMap(); + private final Map mAttributes = new LinkedHashMap(); public String getAttribute(final String key) { return mAttributes.get(key); @@ -49,11 +47,11 @@ public String getAttribute(final String key) { * Otherwise, the value will be converted to a String with toString. * @param key attribute name * @param value attribute value - * @deprecated Use {@link #setAttribute(String, String) instead + * @deprecated Use {@link #setAttribute(String, String)} instead */ @Deprecated public void setAttribute(final String key, final Object value) { - setAttribute(key, value == null? null: value.toString()); + setAttribute(key, value == null ? null : value.toString()); } /** @@ -83,11 +81,10 @@ public void setAttribute(final String key, final String value) { /** * Returns the Set of attributes. */ - public Set> getAttributes() { + public Set> getAttributes() { return mAttributes.entrySet(); } - /** * Returns the ID tag (or equivalent) for this header record. The * default implementation throws a SAMException to indicate "not implemented". @@ -118,7 +115,8 @@ protected int attributesHashCode() { abstract Set getStandardTags(); /** Simple to String that outputs the concrete class name and the set of attributes stored. */ - @Override public String toString() { + @Override + public String toString() { return getClass().getSimpleName() + this.mAttributes.toString(); } @@ -126,5 +124,5 @@ protected int attributesHashCode() { * Returns the record in the SAM line-based text format. Fields are * separated by '\t' characters. The String is NOT terminated by '\n'. */ - abstract public String getSAMString(); + public abstract String getSAMString(); } diff --git a/src/main/java/htsjdk/samtools/AlignmentBlock.java b/src/main/java/htsjdk/samtools/AlignmentBlock.java index 1320dfdf66..bf41783ff3 100644 --- a/src/main/java/htsjdk/samtools/AlignmentBlock.java +++ b/src/main/java/htsjdk/samtools/AlignmentBlock.java @@ -48,11 +48,17 @@ public class AlignmentBlock implements Serializable { } /** The first, 1-based, base in the read that is aligned to the reference reference. */ - public int getReadStart() { return readStart; } + public int getReadStart() { + return readStart; + } /** The first, 1-based, position in the reference to which the read is aligned. */ - public int getReferenceStart() { return referenceStart; } + public int getReferenceStart() { + return referenceStart; + } /** The number of contiguous bases aligned to the reference. */ - public int getLength() { return length; } + public int getLength() { + return length; + } } diff --git a/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java b/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java index ea85458ec3..3d8e0f561c 100644 --- a/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/AsyncSAMFileWriter.java @@ -33,16 +33,25 @@ public AsyncSAMFileWriter(final SAMFileWriter out, final int queueSize) { this.underlyingWriter = out; } - @Override protected void synchronouslyWrite(final SAMRecord item) { this.underlyingWriter.addAlignment(item); } + @Override + protected void synchronouslyWrite(final SAMRecord item) { + this.underlyingWriter.addAlignment(item); + } - @Override protected void synchronouslyClose() { this.underlyingWriter.close(); } + @Override + protected void synchronouslyClose() { + this.underlyingWriter.close(); + } - @Override protected final String getThreadNamePrefix() { return "SAMFileWriterThread-"; } + @Override + protected final String getThreadNamePrefix() { + return "SAMFileWriterThread-"; + } - @Override - public void setProgressLogger(final ProgressLoggerInterface progress) { - this.underlyingWriter.setProgressLogger(progress); - } + @Override + public void setProgressLogger(final ProgressLoggerInterface progress) { + this.underlyingWriter.setProgressLogger(progress); + } @Override public void setSortOrderChecking(boolean check) { diff --git a/src/main/java/htsjdk/samtools/BAMFileConstants.java b/src/main/java/htsjdk/samtools/BAMFileConstants.java index 638c56cc56..8192bf5612 100644 --- a/src/main/java/htsjdk/samtools/BAMFileConstants.java +++ b/src/main/java/htsjdk/samtools/BAMFileConstants.java @@ -36,7 +36,6 @@ class BAMFileConstants { * BAM file magic number. This is what is present in the gunzipped version of the file, * which never exists on disk. */ - static final byte[] BAM_MAGIC = "BAM\1".getBytes(); /** * BAM index file magic numbers. @@ -44,6 +43,7 @@ class BAMFileConstants { */ @Deprecated static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes(); + static final byte[] BAI_INDEX_MAGIC = "BAI\1".getBytes(); static final byte[] CSI_INDEX_MAGIC = "CSI\1".getBytes(); @@ -51,6 +51,7 @@ class BAMFileConstants { * CSI index related constants */ static final int CSI_MAGIC_OFFSET = 0; + static final int CSI_MINSHIFT_OFFSET = 4; static final int CSI_AUXDATA_OFFSET = 12; static final int CSI_CHUNK_SIZE = 16; diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 3693eeba8c..9667174d32 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -23,11 +23,9 @@ */ package htsjdk.samtools; - import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.*; import htsjdk.samtools.util.zip.InflaterFactory; - import java.io.DataInputStream; import java.io.File; import java.io.IOException; @@ -94,15 +92,22 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param samRecordFactory SAM record factory * @throws IOException */ - BAMFileReader(final InputStream stream, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) + BAMFileReader( + final InputStream stream, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) throws IOException { - this(stream, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, - BlockGunzipper.getDefaultInflaterFactory()); + this( + stream, + indexFile, + eagerDecode, + useAsynchronousIO, + validationStringency, + samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); } /** @@ -116,17 +121,20 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream * @throws IOException */ - BAMFileReader(final InputStream stream, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final InflaterFactory inflaterFactory) + BAMFileReader( + final InputStream stream, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) throws IOException { mIndexFile = indexFile; mIsSeekable = false; - mCompressedInputStream = useAsynchronousIO ? new AsyncBlockCompressedInputStream(stream, inflaterFactory) : new BlockCompressedInputStream(stream, inflaterFactory); + mCompressedInputStream = useAsynchronousIO + ? new AsyncBlockCompressedInputStream(stream, inflaterFactory) + : new BlockCompressedInputStream(stream, inflaterFactory); mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream)); this.eagerDecode = eagerDecode; this.mValidationStringency = validationStringency; @@ -144,14 +152,22 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param samRecordFactory SAM record factory * @throws IOException */ - BAMFileReader(final File file, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) - throws IOException { - this(file, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + BAMFileReader( + final File file, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this( + file, + indexFile, + eagerDecode, + useAsynchronousIO, + validationStringency, + samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); } /** @@ -165,20 +181,29 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream * @throws IOException */ - BAMFileReader(final File file, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final InflaterFactory inflaterFactory) - throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(file, inflaterFactory) : new BlockCompressedInputStream(file, inflaterFactory), - indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, samRecordFactory); + BAMFileReader( + final File file, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this( + useAsynchronousIO + ? new AsyncBlockCompressedInputStream(file, inflaterFactory) + : new BlockCompressedInputStream(file, inflaterFactory), + indexFile != null ? indexFile : SamFiles.findIndex(file), + eagerDecode, + useAsynchronousIO, + file.getAbsolutePath(), + validationStringency, + samRecordFactory); if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified() - 5000) { - System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() + - " is older than BAM " + file.getAbsolutePath()); + System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() + " is older than BAM " + + file.getAbsolutePath()); } // Provide better error message when there is an error reading. @@ -195,14 +220,22 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param samRecordFactory SAM record factory * @throws IOException */ - BAMFileReader(final SeekableStream strm, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) - throws IOException { - this(strm, indexFile, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + BAMFileReader( + final SeekableStream strm, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this( + strm, + indexFile, + eagerDecode, + useAsynchronousIO, + validationStringency, + samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); } /** @@ -216,16 +249,25 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream * @throws IOException */ - BAMFileReader(final SeekableStream strm, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final InflaterFactory inflaterFactory) - throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), - indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); + BAMFileReader( + final SeekableStream strm, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this( + useAsynchronousIO + ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) + : new BlockCompressedInputStream(strm, inflaterFactory), + indexFile, + eagerDecode, + useAsynchronousIO, + strm.getSource(), + validationStringency, + samRecordFactory); } /** @@ -238,14 +280,22 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param samRecordFactory SAM record factory * @throws IOException */ - BAMFileReader(final SeekableStream strm, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) - throws IOException { - this(strm, indexStream, eagerDecode, useAsynchronousIO, validationStringency, samRecordFactory, BlockGunzipper.getDefaultInflaterFactory()); + BAMFileReader( + final SeekableStream strm, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { + this( + strm, + indexStream, + eagerDecode, + useAsynchronousIO, + validationStringency, + samRecordFactory, + BlockGunzipper.getDefaultInflaterFactory()); } /** @@ -259,16 +309,25 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream * @throws IOException */ - BAMFileReader(final SeekableStream strm, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final InflaterFactory inflaterFactory) - throws IOException { - this(useAsynchronousIO ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) : new BlockCompressedInputStream(strm, inflaterFactory), - indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, samRecordFactory); + BAMFileReader( + final SeekableStream strm, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final InflaterFactory inflaterFactory) + throws IOException { + this( + useAsynchronousIO + ? new AsyncBlockCompressedInputStream(strm, inflaterFactory) + : new BlockCompressedInputStream(strm, inflaterFactory), + indexStream, + eagerDecode, + useAsynchronousIO, + strm.getSource(), + validationStringency, + samRecordFactory); } /** @@ -282,14 +341,15 @@ public class BAMFileReader extends SamReader.ReaderImplementation { * @param samRecordFactory SAM record factory * @throws IOException */ - private BAMFileReader(final BlockCompressedInputStream compressedInputStream, - final File indexFile, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final String source, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) - throws IOException { + private BAMFileReader( + final BlockCompressedInputStream compressedInputStream, + final File indexFile, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final String source, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { mIndexFile = indexFile; mIsSeekable = true; mCompressedInputStream = compressedInputStream; @@ -312,14 +372,15 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, * @param samRecordFactory SAM record factory * @throws IOException */ - private BAMFileReader(final BlockCompressedInputStream compressedInputStream, - final SeekableStream indexStream, - final boolean eagerDecode, - final boolean useAsynchronousIO, - final String source, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory) - throws IOException { + private BAMFileReader( + final BlockCompressedInputStream compressedInputStream, + final SeekableStream indexStream, + final boolean eagerDecode, + final boolean useAsynchronousIO, + final String source, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory) + throws IOException { mIndexStream = indexStream; mIsSeekable = true; mCompressedInputStream = compressedInputStream; @@ -333,7 +394,8 @@ private BAMFileReader(final BlockCompressedInputStream compressedInputStream, /** Reads through the header and sequence records to find the virtual file offset of the first record in the BAM file. */ static long findVirtualOffsetOfFirstRecord(final File bam) throws IOException { - final BAMFileReader reader = new BAMFileReader(bam, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); + final BAMFileReader reader = + new BAMFileReader(bam, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); final long offset = reader.mFirstRecordPointer; reader.close(); return offset; @@ -344,7 +406,13 @@ static long findVirtualOffsetOfFirstRecord(final File bam) throws IOException { * The caller is responsible for closing the stream. */ static long findVirtualOffsetOfFirstRecord(final SeekableStream seekableStream) throws IOException { - final BAMFileReader reader = new BAMFileReader(seekableStream, (SeekableStream) null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); + final BAMFileReader reader = new BAMFileReader( + seekableStream, + (SeekableStream) null, + false, + false, + ValidationStringency.SILENT, + new DefaultSAMRecordFactory()); return reader.mFirstRecordPointer; } @@ -363,7 +431,7 @@ void enableFileSource(final SamReader reader, final boolean enabled) { */ @Override protected void enableIndexCaching(final boolean enabled) { - if(mIndex != null) + if (mIndex != null) throw new SAMException("Unable to turn on index caching; index file has already been loaded."); this.mEnableIndexCaching = enabled; } @@ -381,11 +449,15 @@ protected void enableIndexMemoryMapping(final boolean enabled) { this.mEnableIndexMemoryMapping = enabled; } - @Override void enableCrcChecking(final boolean enabled) { + @Override + void enableCrcChecking(final boolean enabled) { this.mCompressedInputStream.setCheckCrcs(enabled); } - @Override void setSAMRecordFactory(final SAMRecordFactory samRecordFactory) { this.samRecordFactory = samRecordFactory; } + @Override + void setSAMRecordFactory(final SAMRecordFactory samRecordFactory) { + this.samRecordFactory = samRecordFactory; + } @Override public SamReader.Type type() { @@ -409,26 +481,27 @@ public boolean hasIndex() { */ @Override public BAMIndex getIndex() { - if(!hasIndex()) { + if (!hasIndex()) { throw new SAMException("No index is available for this BAM file."); } - if(mIndex == null) { + if (mIndex == null) { final SamIndexes samIndexType = getIndexType(); final SAMSequenceDictionary sequenceDictionary = getFileHeader().getSequenceDictionary(); - if(mIndexFile != null) { + if (mIndexFile != null) { if (samIndexType.equals(SamIndexes.BAI)) { - mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping) + mIndex = mEnableIndexCaching + ? new CachingBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping) : new DiskBasedBAMFileIndex(mIndexFile, sequenceDictionary, mEnableIndexMemoryMapping); } else if (samIndexType.equals(SamIndexes.CSI)) { mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, sequenceDictionary); } else { throw new SAMFormatException("Unsupported BAM index file format: " + mIndexFile.getName()); } - } else if(mIndexStream != null) { + } else if (mIndexStream != null) { if (samIndexType.equals(SamIndexes.BAI)) { mIndex = new CachingBAMFileIndex(mIndexStream, sequenceDictionary); } else if (samIndexType.equals(SamIndexes.CSI)) { - mIndex = new CSIIndex(mIndexStream, sequenceDictionary); + mIndex = new CSIIndex(mIndexStream, sequenceDictionary); } else { throw new SAMFormatException("Unsupported BAM index file format: " + mIndexStream.getSource()); } @@ -455,13 +528,16 @@ public SamIndexes getIndexType() { if (samIndexesType == SamIndexes.BAI || samIndexesType == SamIndexes.CSI) { return samIndexesType; } - throw new SAMFormatException(String.format("Unknown BAM index file type: %s in %s", samIndexesType, mIndexStream.getSource())); + throw new SAMFormatException( + String.format("Unknown BAM index file type: %s in %s", samIndexesType, mIndexStream.getSource())); } return null; } - public void setEagerDecode(final boolean desired) { this.eagerDecode = desired; } + public void setEagerDecode(final boolean desired) { + this.eagerDecode = desired; + } @Override public void close() { @@ -541,7 +617,7 @@ public CloseableIterator getIterator(final SAMFileSpan chunks) { } // Create an iterator over the given chunk boundaries. - mCurrentIterator = new BAMFileIndexIterator(((BAMFileSpan)chunks).toCoordinateArray()); + mCurrentIterator = new BAMFileIndexIterator(((BAMFileSpan) chunks).toCoordinateArray()); return mCurrentIterator; } @@ -553,7 +629,7 @@ public CloseableIterator getIterator(final SAMFileSpan chunks) { */ @Override public SAMFileSpan getFilePointerSpanningReads() { - return new BAMFileSpan(new Chunk(mFirstRecordPointer,Long.MAX_VALUE)); + return new BAMFileSpan(new Chunk(mFirstRecordPointer, Long.MAX_VALUE)); } /** @@ -704,8 +780,9 @@ public CloseableIterator queryUnmapped() { * @param validationStringency Determines how stringent to be when validating the sam * @param source Note that this is used only for reporting errors. */ - protected static SAMFileHeader readHeader(final BinaryCodec stream, final ValidationStringency validationStringency, final String source) - throws IOException { + protected static SAMFileHeader readHeader( + final BinaryCodec stream, final ValidationStringency validationStringency, final String source) + throws IOException { final byte[] buffer = new byte[4]; stream.readBytes(buffer); @@ -717,27 +794,27 @@ protected static SAMFileHeader readHeader(final BinaryCodec stream, final Valida final String textHeader = stream.readString(headerTextLength); final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); headerCodec.setValidationStringency(validationStringency); - final SAMFileHeader samFileHeader = headerCodec.decode(BufferedLineReader.fromString(textHeader), - source); + final SAMFileHeader samFileHeader = headerCodec.decode(BufferedLineReader.fromString(textHeader), source); final int sequenceCount = stream.readInt(); if (!samFileHeader.getSequenceDictionary().isEmpty()) { // It is allowed to have binary sequences but no text sequences, so only validate if both are present if (sequenceCount != samFileHeader.getSequenceDictionary().size()) { - throw new SAMFormatException("Number of sequences in text header (" + - samFileHeader.getSequenceDictionary().size() + - ") != number of sequences in binary header (" + sequenceCount + ") for file " + source); + throw new SAMFormatException("Number of sequences in text header (" + + samFileHeader.getSequenceDictionary().size() + + ") != number of sequences in binary header (" + + sequenceCount + ") for file " + source); } for (int i = 0; i < sequenceCount; i++) { final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(stream, source); final SAMSequenceRecord sequenceRecord = samFileHeader.getSequence(i); if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) { - throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " + - source); + throw new SAMFormatException( + "For sequence " + i + ", text and binary have different names in file " + source); } if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) { - throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " + - source); + throw new SAMFormatException( + "For sequence " + i + ", text and binary have different lengths in file " + source); } } } else { @@ -794,7 +871,6 @@ protected void assertOpen() { public void remove() { throw new UnsupportedOperationException("Not supported: remove"); } - } private class EmptyBamIterator extends AbstractBamIterator { @@ -810,8 +886,8 @@ public SAMRecord next() { } /** - - /** + * + * /** * Iterator for non-indexed sequential iteration through all SAMRecords in file. * Starting point of iteration is wherever current file position is when the iterator is constructed. */ @@ -829,8 +905,8 @@ private class BAMFileIterator extends AbstractBamIterator { */ BAMFileIterator(final boolean advance) { this.bamRecordCodec = new BAMRecordCodec(getFileHeader(), samRecordFactory); - this.bamRecordCodec.setInputStream(BAMFileReader.this.mStream.getInputStream(), - BAMFileReader.this.mStream.getInputFileName()); + this.bamRecordCodec.setInputStream( + BAMFileReader.this.mStream.getInputStream(), BAMFileReader.this.mStream.getInputFileName()); if (advance) { advance(); @@ -861,9 +937,10 @@ void advance() { mNextRecord.setValidationStringency(mValidationStringency); if (mValidationStringency != ValidationStringency.SILENT) { - final List validationErrors = mNextRecord.isValid(mValidationStringency == ValidationStringency.STRICT); - SAMUtils.processValidationErrors(validationErrors, - this.samRecordIndex, BAMFileReader.this.getValidationStringency()); + final List validationErrors = + mNextRecord.isValid(mValidationStringency == ValidationStringency.STRICT); + SAMUtils.processValidationErrors( + validationErrors, this.samRecordIndex, BAMFileReader.this.getValidationStringency()); } } if (eagerDecode && mNextRecord != null) { @@ -882,8 +959,9 @@ SAMRecord getNextRecord() throws IOException { final SAMRecord next = bamRecordCodec.decode(); final long stopCoordinate = mCompressedInputStream.getFilePointer(); - if(mReader != null && next != null) - next.setFileSource(new SAMFileSource(mReader,new BAMFileSpan(new Chunk(startCoordinate,stopCoordinate)))); + if (mReader != null && next != null) + next.setFileSource( + new SAMFileSource(mReader, new BAMFileSpan(new Chunk(startCoordinate, stopCoordinate)))); return next; } @@ -901,8 +979,7 @@ protected SAMRecord peek() { * @param referenceIndex Desired reference sequence. * @param start 1-based alignment start. */ - private CloseableIterator createStartingAtIndexIterator(final int referenceIndex, - final int start) { + private CloseableIterator createStartingAtIndexIterator(final int referenceIndex, final int start) { // Hit the index to determine the chunk boundaries for the required data. final BAMIndex fileIndex = getIndex(); @@ -914,7 +991,7 @@ private CloseableIterator createStartingAtIndexIterator(final int ref // Add some preprocessing filters for edge-case reads that don't fit into this // query type. - return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start)); + return new BAMQueryFilteringIterator(iterator, new BAMStartingAtIteratorFilter(referenceIndex, start)); } /** @@ -927,7 +1004,8 @@ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIn final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length]; for (int i = 0; i < intervals.length; ++i) { final QueryInterval interval = intervals[i]; - final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); + final BAMFileSpan span = + fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end); inputSpans[i] = span; } final BAMFileSpan span; @@ -939,8 +1017,7 @@ public static BAMFileSpan getFileSpan(QueryInterval[] intervals, BAMIndex fileIn return span; } - private CloseableIterator createIndexIterator(final QueryInterval[] intervals, - final boolean contained) { + private CloseableIterator createIndexIterator(final QueryInterval[] intervals, final boolean contained) { QueryInterval.assertIntervalsOptimized(intervals); @@ -951,7 +1028,8 @@ private CloseableIterator createIndexIterator(final QueryInterval[] i // Add some preprocessing filters for edge-case reads that don't fit into this // query type. - return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); + return new BAMQueryFilteringIterator( + iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); } /** @@ -962,9 +1040,8 @@ private CloseableIterator createIndexIterator(final QueryInterval[] i * @param filePointers file pointer pairs corresponding to chunk boundaries for the * intervals */ - public CloseableIterator createIndexIterator(final QueryInterval[] intervals, - final boolean contained, - final long[] filePointers) { + public CloseableIterator createIndexIterator( + final QueryInterval[] intervals, final boolean contained, final long[] filePointers) { QueryInterval.assertIntervalsOptimized(intervals); @@ -973,7 +1050,8 @@ public CloseableIterator createIndexIterator(final QueryInterval[] in // Add some preprocessing filters for edge-case reads that don't fit into this // query type. - return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); + return new BAMQueryFilteringIterator( + iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); } /** @@ -998,18 +1076,16 @@ private class BAMFileIndexIterator extends BAMFileIterator { * @param filePointers the block / offset combination, stored in chunk format. */ BAMFileIndexIterator(final long[] filePointers) { - super(false); // delay advance() until after construction + super(false); // delay advance() until after construction mFilePointers = filePointers; advance(); } @Override - SAMRecord getNextRecord() - throws IOException { + SAMRecord getNextRecord() throws IOException { // Advance to next file block if necessary while (mCompressedInputStream.getFilePointer() >= mFilePointerLimit) { - if (mFilePointers == null || - mFilePointerIndex >= mFilePointers.length) { + if (mFilePointers == null || mFilePointerIndex >= mFilePointers.length) { return null; } final long startOffset = mFilePointers[mFilePointerIndex++]; @@ -1034,10 +1110,11 @@ public class BAMQueryFilteringIterator extends AbstractBamIterator { * The next record to be returned. Will be null if no such record exists. */ protected SAMRecord mNextRecord; + private final BAMIteratorFilter iteratorFilter; - public BAMQueryFilteringIterator(final CloseableIterator iterator, - final BAMIteratorFilter iteratorFilter) { + public BAMQueryFilteringIterator( + final CloseableIterator iterator, final BAMIteratorFilter iteratorFilter) { this.wrappedIterator = iterator; this.iteratorFilter = iteratorFilter; mNextRecord = advance(); @@ -1058,8 +1135,7 @@ public boolean hasNext() { */ @Override public SAMRecord next() { - if(!hasNext()) - throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available"); + if (!hasNext()) throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available"); final SAMRecord currentRead = mNextRecord; mNextRecord = advance(); return currentRead; @@ -1068,21 +1144,24 @@ public SAMRecord next() { SAMRecord advance() { while (true) { // Pull next record from stream - if(!wrappedIterator.hasNext()) - return null; + if (!wrappedIterator.hasNext()) return null; final SAMRecord record = wrappedIterator.next(); switch (iteratorFilter.compareToFilter(record)) { - case MATCHES_FILTER: return record; - case STOP_ITERATION: return null; - case CONTINUE_ITERATION: break; // keep looping - default: throw new SAMException("Unexpected return from compareToFilter"); + case MATCHES_FILTER: + return record; + case STOP_ITERATION: + return null; + case CONTINUE_ITERATION: + break; // keep looping + default: + throw new SAMException("Unexpected return from compareToFilter"); } } } } - private class BAMFileIndexUnmappedIterator extends BAMFileIterator { + private class BAMFileIndexUnmappedIterator extends BAMFileIterator { private BAMFileIndexUnmappedIterator() { while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { advance(); diff --git a/src/main/java/htsjdk/samtools/BAMFileSpan.java b/src/main/java/htsjdk/samtools/BAMFileSpan.java index d99760d2a8..4fe90198e7 100644 --- a/src/main/java/htsjdk/samtools/BAMFileSpan.java +++ b/src/main/java/htsjdk/samtools/BAMFileSpan.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.StringUtil; - import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; @@ -90,8 +89,7 @@ public boolean isEmpty() { @Override public BAMFileSpan clone() { final BAMFileSpan clone = new BAMFileSpan(); - for(final Chunk chunk: chunks) - clone.chunks.add(chunk.clone()); + for (final Chunk chunk : chunks) clone.chunks.add(chunk.clone()); return clone; } @@ -104,30 +102,26 @@ public BAMFileSpan clone() { */ @Override public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { - if(fileSpan == null) - return clone(); + if (fileSpan == null) return clone(); - if(!(fileSpan instanceof BAMFileSpan)) - throw new SAMException("Unable to compare "); + if (!(fileSpan instanceof BAMFileSpan)) throw new SAMException("Unable to compare "); - final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan; + final BAMFileSpan bamFileSpan = (BAMFileSpan) fileSpan; - if(bamFileSpan.isEmpty()) - return clone(); + if (bamFileSpan.isEmpty()) return clone(); validateSorted(); final BAMFileSpan trimmedChunkList = new BAMFileSpan(); final long chunkStart = bamFileSpan.chunks.get(0).getChunkStart(); - for(final Chunk chunkToTrim: chunks) { - if(chunkToTrim.getChunkEnd() > chunkStart) { - if(chunkToTrim.getChunkStart() >= chunkStart) { + for (final Chunk chunkToTrim : chunks) { + if (chunkToTrim.getChunkEnd() > chunkStart) { + if (chunkToTrim.getChunkStart() >= chunkStart) { // This chunk from the list is completely beyond the start of the filtering chunk. trimmedChunkList.add(chunkToTrim.clone()); - } - else { + } else { // This chunk from the list partially overlaps the filtering chunk and must be trimmed. - trimmedChunkList.add(new Chunk(chunkStart,chunkToTrim.getChunkEnd())); + trimmedChunkList.add(new Chunk(chunkStart, chunkToTrim.getChunkEnd())); } } } @@ -143,30 +137,27 @@ public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) { * given chunk. */ public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) { - if(fileSpan == null) - return clone(); + if (fileSpan == null) return clone(); - if(!(fileSpan instanceof BAMFileSpan)) - throw new SAMException("Unable to compare "); + if (!(fileSpan instanceof BAMFileSpan)) throw new SAMException("Unable to compare "); - final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan; + final BAMFileSpan bamFileSpan = (BAMFileSpan) fileSpan; - if(bamFileSpan.isEmpty()) - return clone(); + if (bamFileSpan.isEmpty()) return clone(); validateSorted(); final BAMFileSpan trimmedChunkList = new BAMFileSpan(); - final long chunkEnd = bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); - for(final Chunk chunkToTrim: chunks) { - if(chunkToTrim.getChunkStart() < chunkEnd) { - if(chunkToTrim.getChunkEnd() <= chunkEnd) { + final long chunkEnd = + bamFileSpan.chunks.get(bamFileSpan.chunks.size() - 1).getChunkEnd(); + for (final Chunk chunkToTrim : chunks) { + if (chunkToTrim.getChunkStart() < chunkEnd) { + if (chunkToTrim.getChunkEnd() <= chunkEnd) { // This chunk from the list is completely before the end of the filtering chunk. trimmedChunkList.add(chunkToTrim.clone()); - } - else { + } else { // This chunk from the list partially overlaps the filtering chunk and must be trimmed. - trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(),chunkEnd)); + trimmedChunkList.add(new Chunk(chunkToTrim.getChunkStart(), chunkEnd)); } } } @@ -179,10 +170,10 @@ public SAMFileSpan removeContentsAfter(final SAMFileSpan fileSpan) { */ @Override public SAMFileSpan getContentsFollowing() { - if(chunks.isEmpty()) + if (chunks.isEmpty()) throw new SAMException("Unable to get the file pointer following this one: no data present."); validateSorted(); - return new BAMFileSpan(new Chunk(chunks.get(chunks.size()-1).getChunkEnd(),Long.MAX_VALUE)); + return new BAMFileSpan(new Chunk(chunks.get(chunks.size() - 1).getChunkEnd(), Long.MAX_VALUE)); } /** @@ -228,7 +219,7 @@ public long[] toCoordinateArray() { */ public long getFirstOffset() { final long result = 0; - if (chunks == null){ + if (chunks == null) { return result; } for (final Chunk chunk : chunks) { @@ -250,7 +241,7 @@ public List getChunks() { * @return The single chunk stored in this span */ protected Chunk getSingleChunk() { - if (chunks.size() != 1){ + if (chunks.size() != 1) { throw new SAMException("Expecting a single chunk for span. Found " + chunks.size()); } return chunks.get(0); @@ -265,12 +256,12 @@ protected Chunk getSingleChunk() { * @return A list of chunks. */ protected static SAMFileSpan toChunkList(final long[] coordinateArray) { - if(coordinateArray.length % 2 != 0) + if (coordinateArray.length % 2 != 0) throw new SAMException("Data supplied does not appear to be in coordinate array format."); final BAMFileSpan chunkList = new BAMFileSpan(); - for(int i = 0; i < coordinateArray.length; i += 2) - chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1])); + for (int i = 0; i < coordinateArray.length; i += 2) + chunkList.add(new Chunk(coordinateArray[i], coordinateArray[i + 1])); chunkList.validateSorted(); @@ -281,9 +272,10 @@ protected static SAMFileSpan toChunkList(final long[] coordinateArray) { * Validates the list of chunks to ensure that they appear in sorted order. */ private void validateSorted() { - for(int i = 1; i < chunks.size(); i++) { - if(chunks.get(i).getChunkStart() < chunks.get(i-1).getChunkEnd()) - throw new SAMException(String.format("Chunk list is unsorted; chunk %s is before chunk %s",chunks.get(i-1),chunks.get(i))); + for (int i = 1; i < chunks.size(); i++) { + if (chunks.get(i).getChunkStart() < chunks.get(i - 1).getChunkEnd()) + throw new SAMException(String.format( + "Chunk list is unsorted; chunk %s is before chunk %s", chunks.get(i - 1), chunks.get(i))); } } @@ -303,7 +295,7 @@ public String toString() { public static BAMFileSpan merge(final BAMFileSpan[] spans) { final ArrayList inputChunks = new ArrayList(); for (final BAMFileSpan span : spans) { - if(span != null){ + if (span != null) { inputChunks.addAll(span.chunks); } } diff --git a/src/main/java/htsjdk/samtools/BAMFileWriter.java b/src/main/java/htsjdk/samtools/BAMFileWriter.java index ae6f6a0c26..b2e3381493 100644 --- a/src/main/java/htsjdk/samtools/BAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/BAMFileWriter.java @@ -29,7 +29,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.zip.DeflaterFactory; - import java.io.File; import java.io.IOException; import java.io.OutputStream; @@ -72,19 +71,25 @@ protected BAMFileWriter(final OutputStream os, final File file, final int compre outputBinaryCodec.setOutputFileName(getPathString(file)); } - protected BAMFileWriter(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + protected BAMFileWriter( + final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { blockCompressedOutputStream = new BlockCompressedOutputStream(os, file, compressionLevel, deflaterFactory); outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream); outputBinaryCodec.setOutputFileName(getPathString(file)); } - protected BAMFileWriter(final OutputStream os, final String absoluteFilename, final int compressionLevel, final DeflaterFactory deflaterFactory) { - blockCompressedOutputStream = new BlockCompressedOutputStream(os, (Path)null, compressionLevel, deflaterFactory); - outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream); - outputBinaryCodec.setOutputFileName(absoluteFilename); + protected BAMFileWriter( + final OutputStream os, + final String absoluteFilename, + final int compressionLevel, + final DeflaterFactory deflaterFactory) { + blockCompressedOutputStream = + new BlockCompressedOutputStream(os, (Path) null, compressionLevel, deflaterFactory); + outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream); + outputBinaryCodec.setOutputFileName(absoluteFilename); } - private void prepareToWriteAlignments() { + private void prepareToWriteAlignments() { if (bamRecordCodec == null) { bamRecordCodec = new BAMRecordCodec(getFileHeader()); bamRecordCodec.setOutputStream(outputBinaryCodec.getOutputStream(), getFilename()); @@ -92,17 +97,17 @@ private void prepareToWriteAlignments() { } /** @return absolute path, or null if arg is null. */ - private String getPathString(final File path){ + private String getPathString(final File path) { return (path != null) ? path.getAbsolutePath() : null; } - // Allow enabling the bam index construction - // only enabled by factory method before anything is written - void enableBamIndexConstruction () { - if (!getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)){ - throw new SAMException("Not creating BAM index since not sorted by coordinates: " + getSortOrder()); + // Allow enabling the bam index construction + // only enabled by factory method before anything is written + void enableBamIndexConstruction() { + if (!getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) { + throw new SAMException("Not creating BAM index since not sorted by coordinates: " + getSortOrder()); } - if(getFilename() == null){ + if (getFilename() == null) { throw new SAMException("Not creating BAM index since we don't have an output file name"); } bamIndexer = createBamIndex(getFilename()); @@ -110,12 +115,13 @@ void enableBamIndexConstruction () { private BAMIndexer createBamIndex(final String pathURI) { try { - final String indexFileBase = pathURI.endsWith(FileExtensions.BAM) ? - pathURI.substring(0, pathURI.lastIndexOf('.')) : pathURI; + final String indexFileBase = + pathURI.endsWith(FileExtensions.BAM) ? pathURI.substring(0, pathURI.lastIndexOf('.')) : pathURI; final Path indexPath = IOUtil.getPath(indexFileBase + FileExtensions.BAI_INDEX); if (Files.exists(indexPath)) { if (!Files.isWritable(indexPath)) { - throw new SAMException("Not creating BAM index since unable to write index file " + indexPath.toUri()); + throw new SAMException( + "Not creating BAM index since unable to write index file " + indexPath.toUri()); } } return new BAMIndexer(indexPath, getFileHeader()); @@ -153,13 +159,13 @@ protected void writeHeader(final String textHeader) { @Override protected void finish() { outputBinaryCodec.close(); - try { - if (bamIndexer != null) { - bamIndexer.finish(); - } - } catch (Exception e) { - throw new SAMException("Exception writing BAM index file", e); + try { + if (bamIndexer != null) { + bamIndexer.finish(); } + } catch (Exception e) { + throw new SAMException("Exception writing BAM index file", e); + } } /** @return absolute path in URI format, or null if this writer does not correspond to a file. @@ -173,7 +179,8 @@ protected String getFilename() { * Writes a header to a BAM file. samFileHeader and headerText are redundant - one can be used to regenerate the other but in * some instances we already have both so this allows us to save some cycles */ - protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader, final String headerText) { + protected static void writeHeader( + final BinaryCodec outputBinaryCodec, final SAMFileHeader samFileHeader, final String headerText) { outputBinaryCodec.writeBytes(BAMFileConstants.BAM_MAGIC); // calculate and write the length of the SAM file header text and the header text @@ -181,7 +188,8 @@ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAM // write the sequences binarily. This is redundant with the text header outputBinaryCodec.writeInt(samFileHeader.getSequenceDictionary().size()); - for (final SAMSequenceRecord sequenceRecord: samFileHeader.getSequenceDictionary().getSequences()) { + for (final SAMSequenceRecord sequenceRecord : + samFileHeader.getSequenceDictionary().getSequences()) { outputBinaryCodec.writeString(sequenceRecord.getSequenceName(), true, true); outputBinaryCodec.writeInt(sequenceRecord.getSequenceLength()); } @@ -203,7 +211,8 @@ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAM * @param samFileHeader the header to write */ public static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) { - final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, (Path)null); + final BlockCompressedOutputStream blockCompressedOutputStream = + new BlockCompressedOutputStream(outputStream, (Path) null); final BinaryCodec outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream); writeHeader(outputBinaryCodec, samFileHeader); try { diff --git a/src/main/java/htsjdk/samtools/BAMIndex.java b/src/main/java/htsjdk/samtools/BAMIndex.java index ffc93993ce..d55797241e 100644 --- a/src/main/java/htsjdk/samtools/BAMIndex.java +++ b/src/main/java/htsjdk/samtools/BAMIndex.java @@ -23,9 +23,8 @@ */ package htsjdk.samtools; -import java.io.Closeable; - import htsjdk.samtools.util.FileExtensions; +import java.io.Closeable; /** * A basic interface for querying BAM indices. @@ -55,7 +54,7 @@ public interface BAMIndex extends Closeable { * Gets the compressed chunks which should be searched for the contents of records contained by the span * referenceIndex:startPos-endPos, inclusive. See the BAM spec for more information on how a chunk is * represented. - * + * * @param referenceIndex The contig. * @param startPos Genomic start of query. * @param endPos Genomic end of query. diff --git a/src/main/java/htsjdk/samtools/BAMIndexContent.java b/src/main/java/htsjdk/samtools/BAMIndexContent.java index be9d856aa0..d4dd047f78 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexContent.java +++ b/src/main/java/htsjdk/samtools/BAMIndexContent.java @@ -1,69 +1,75 @@ -/* - * The MIT License - * - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools; - -/** - * Represents the contents of a bam index file for one reference. - * A BAM index (.bai) file contains information for all references in the bam file. - * This class describes the data present in the index file for one of these references; - * including the bins, chunks, and linear index. - */ -class BAMIndexContent extends BinningIndexContent { - /** - * Chunks containing metaData for the reference, e.g. number of aligned and unaligned records - */ - private final BAMIndexMetaData mMetaData; - - - - /** - * @param referenceSequence Content corresponds to this reference. - * @param binList Array of bins represented by this content, possibly sparse - * @param metaData Extra information about the reference in this index - * @param linearIndex Additional index used to optimize queries - */ - BAMIndexContent(final int referenceSequence, final BinList binList, final BAMIndexMetaData metaData, final LinearIndex linearIndex) { - super(referenceSequence, binList, linearIndex); - this.mMetaData = metaData; - } - - /** - * @param referenceSequence Content corresponds to this reference. - * @param bins Array of bins represented by this content, possibly sparse - * @param numberOfBins Number of non-null bins - * @param metaData Extra information about the reference in this index - * @param linearIndex Additional index used to optimize queries - */ - BAMIndexContent(final int referenceSequence, final Bin[] bins, final int numberOfBins, final BAMIndexMetaData metaData, final LinearIndex linearIndex) { - this(referenceSequence, new BinList(bins, numberOfBins), metaData, linearIndex); - } - - /** - * @return the meta data chunks for this content - */ - public BAMIndexMetaData getMetaData() { - return mMetaData; - } - -} +/* + * The MIT License + * + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +/** + * Represents the contents of a bam index file for one reference. + * A BAM index (.bai) file contains information for all references in the bam file. + * This class describes the data present in the index file for one of these references; + * including the bins, chunks, and linear index. + */ +class BAMIndexContent extends BinningIndexContent { + /** + * Chunks containing metaData for the reference, e.g. number of aligned and unaligned records + */ + private final BAMIndexMetaData mMetaData; + + /** + * @param referenceSequence Content corresponds to this reference. + * @param binList Array of bins represented by this content, possibly sparse + * @param metaData Extra information about the reference in this index + * @param linearIndex Additional index used to optimize queries + */ + BAMIndexContent( + final int referenceSequence, + final BinList binList, + final BAMIndexMetaData metaData, + final LinearIndex linearIndex) { + super(referenceSequence, binList, linearIndex); + this.mMetaData = metaData; + } + + /** + * @param referenceSequence Content corresponds to this reference. + * @param bins Array of bins represented by this content, possibly sparse + * @param numberOfBins Number of non-null bins + * @param metaData Extra information about the reference in this index + * @param linearIndex Additional index used to optimize queries + */ + BAMIndexContent( + final int referenceSequence, + final Bin[] bins, + final int numberOfBins, + final BAMIndexMetaData metaData, + final LinearIndex linearIndex) { + this(referenceSequence, new BinList(bins, numberOfBins), metaData, linearIndex); + } + + /** + * @return the meta data chunks for this content + */ + public BAMIndexMetaData getMetaData() { + return mMetaData; + } +} diff --git a/src/main/java/htsjdk/samtools/BAMIndexMerger.java b/src/main/java/htsjdk/samtools/BAMIndexMerger.java index 2d0890e750..0ba360cd74 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexMerger.java +++ b/src/main/java/htsjdk/samtools/BAMIndexMerger.java @@ -25,7 +25,6 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BlockCompressedFilePointerUtil; - import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; @@ -78,8 +77,9 @@ public void processIndex(final AbstractBAMFileIndex index, final long partLength sequenceDictionary = index.getBamDictionary(); } if (index.getNumberOfReferences() != numReferences) { - throw new IllegalArgumentException( - String.format("Cannot merge BAI files with different number of references, %s and %s.", numReferences, index.getNumberOfReferences())); + throw new IllegalArgumentException(String.format( + "Cannot merge BAI files with different number of references, %s and %s.", + numReferences, index.getNumberOfReferences())); } index.getBamDictionary().assertSameDictionary(sequenceDictionary); // just store the indexes rather than computing the BAMIndexContent for each ref, @@ -99,7 +99,8 @@ public void finish(final long dataFileLength) { try (BinaryBAMIndexWriter writer = new BinaryBAMIndexWriter(numReferences, out)) { for (int ref = 0; ref < numReferences; ref++) { final int r = ref; - List bamIndexContentList = indexes.stream().map(index -> index.getQueryResults(r)).collect(Collectors.toList()); + List bamIndexContentList = + indexes.stream().map(index -> index.getQueryResults(r)).collect(Collectors.toList()); final BAMIndexContent bamIndexContent = mergeBAMIndexContent(ref, bamIndexContentList, offsets); writer.writeReference(bamIndexContent); } @@ -111,8 +112,8 @@ public static AbstractBAMFileIndex openIndex(SeekableStream stream, SAMSequenceD return new CachingBamFileIndexOptimizedForMerging(stream, dictionary); } - private static BAMIndexContent mergeBAMIndexContent(final int referenceSequence, - final List bamIndexContentList, final long[] offsets) { + private static BAMIndexContent mergeBAMIndexContent( + final int referenceSequence, final List bamIndexContentList, final long[] offsets) { final List binLists = new ArrayList<>(); final List metaDataList = new ArrayList<>(); final List linearIndexes = new ArrayList<>(); @@ -140,9 +141,14 @@ private static BAMIndexContent mergeBAMIndexContent(final int referenceSequence, * @param offsets bin i will be shifted by offset i * @return the merged bins */ - public static BinningIndexContent.BinList mergeBins(final List binLists, final long[] offsets) { + public static BinningIndexContent.BinList mergeBins( + final List binLists, final long[] offsets) { final List mergedBins = new ArrayList<>(); - final int maxBinNumber = binLists.stream().filter(Objects::nonNull).mapToInt(bl -> bl.maxBinNumber).max().orElse(0); + final int maxBinNumber = binLists.stream() + .filter(Objects::nonNull) + .mapToInt(bl -> bl.maxBinNumber) + .max() + .orElse(0); int commonNonNullBins = 0; for (int i = 0; i <= maxBinNumber; i++) { final List nonNullBins = new ArrayList<>(); @@ -161,8 +167,11 @@ public static BinningIndexContent.BinList mergeBins(final List bins) { final List allChunks = new ArrayList<>(); for (Bin b : bins) { if (b.getReferenceSequence() != referenceSequence) { - throw new IllegalArgumentException(String.format("Bins have different reference sequences, %s and %s.", b.getReferenceSequence(), referenceSequence)); + throw new IllegalArgumentException(String.format( + "Bins have different reference sequences, %s and %s.", + b.getReferenceSequence(), referenceSequence)); } if (b.getBinNumber() != binNumber) { - throw new IllegalArgumentException(String.format("Bins have different numbers, %s and %s.", b.getBinNumber(), binNumber)); + throw new IllegalArgumentException( + String.format("Bins have different numbers, %s and %s.", b.getBinNumber(), binNumber)); } allChunks.addAll(b.getChunkList()); } @@ -241,7 +253,8 @@ private static BAMIndexMetaData mergeMetaData(final List metaD * @param offsets linear index i will be shifted by offset i * @return the merged linear index */ - public static LinearIndex mergeLinearIndexes(final int referenceSequence, final List linearIndexes, final long[] offsets) { + public static LinearIndex mergeLinearIndexes( + final int referenceSequence, final List linearIndexes, final long[] offsets) { int maxIndex = -1; for (LinearIndex li : linearIndexes) { if (li == null) { diff --git a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java index e2b1818bec..8e4ad4531a 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexMetaData.java +++ b/src/main/java/htsjdk/samtools/BAMIndexMetaData.java @@ -1,287 +1,289 @@ -/* - * The MIT License - * - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools; - -import htsjdk.samtools.cram.BAIEntry; -import htsjdk.samtools.util.BlockCompressedFilePointerUtil; - -import java.io.File; -import java.io.IOException; -import java.util.List; - -/** - * Metadata about the bam index contained within the bam index. - * One instance created per index file. - */ -public class BAMIndexMetaData { - - // information for the entire index. - // stored at the end of the index - private long noCoordinateRecords = 0; - - // information for each reference. - // stored in two chunks in bin # MAX_BINS - private long firstOffset = -1; - private long lastOffset = 0; - private int alignedRecords = 0; - private int unAlignedRecords = 0; // unmapped, but associated with this reference - - - /** - * Constructor used when writing an index - * construct one instance for each index generated - */ - BAMIndexMetaData() { - noCoordinateRecords = 0; - newReference(); - } - - /** - * Constructor used when reading an index - * construct one instance for each index generated - */ - BAMIndexMetaData(List chunkList) { - noCoordinateRecords = 0; - - if (chunkList == null || chunkList.isEmpty()) { - // System.out.println("No metadata chunks"); - } else if (chunkList.size() != 2) { - throw new SAMException("Unexpected number of metadata chunks " + (chunkList.size())); - } - // fill in the first/lastOffset un/alignedRecords from this - boolean firstChunk = true; - if (chunkList != null) { - for (Chunk c : chunkList) { - long start = c.getChunkStart(); - long end = c.getChunkEnd(); - if (firstChunk) { - firstOffset = start; - lastOffset = end; - firstChunk = false; - } else { - firstChunk = true; - alignedRecords = (int) start; - unAlignedRecords = (int) end; - } - } - } - } - - private BAMIndexMetaData(final long firstOffset, final long lastOffset, final int alignedRecords, final int unAlignedRecords) { - this.firstOffset = firstOffset; - this.lastOffset = lastOffset; - this.alignedRecords = alignedRecords; - this.unAlignedRecords = unAlignedRecords; - } - - /** - * @return the count of aligned records associated with this reference - */ - public int getAlignedRecordCount() { - return alignedRecords; - } - - /** - * @return the count of unaligned records associated with this reference - */ - public int getUnalignedRecordCount() { - return unAlignedRecords; - } - - /** - * Call for each new reference sequence encountered - */ - void newReference() { - firstOffset = -1; - lastOffset = 0; - alignedRecords = 0; - unAlignedRecords = 0; - } - - /** - * Extract relevant metaData from the record and its filePointer - * Call only once per record in the file being indexed - * - * @param rec - */ - void recordMetaData(final SAMRecord rec) { - - final int alignmentStart = rec.getAlignmentStart(); - if (alignmentStart == SAMRecord.NO_ALIGNMENT_START) { - incrementNoCoordinateRecordCount(); - return; - } - - if (rec.getFileSource() == null) { - throw new SAMException("BAM cannot be indexed without setting a fileSource for record " + rec); - } - final Chunk newChunk = ((BAMFileSpan) rec.getFileSource().getFilePointer()).getSingleChunk(); - final long start = newChunk.getChunkStart(); - final long end = newChunk.getChunkEnd(); - - if (rec.getReadUnmappedFlag()) { - unAlignedRecords++; - } else { - alignedRecords++; - } - if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { - this.firstOffset = start; - } - if (BlockCompressedFilePointerUtil.compare(lastOffset, end) < 1) { - this.lastOffset = end; - } - } - - // The resolution of a CRAM BAI index is more coarse than for BAM BAI. Each entry - // is represented by a BAIEntry that represents a slice (or, in the case of - // MULTI_REFERENCE slices, a subset of a slice), rather than SAMRecords. - void recordMetaData(final BAIEntry baiEntry) { - alignedRecords += baiEntry.getMappedReadsCount(); - noCoordinateRecords += baiEntry.getUnmappedUnplacedReadsCount(); - unAlignedRecords += baiEntry.getUnmappedReadsCount(); - - final long start = baiEntry.getSliceByteOffsetFromCompressionHeaderStart(); - - if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { - this.firstOffset = start; - // not actually used, so set it to a dummy value (start) - // see https://github.com/samtools/htsjdk/issues/401 - this.lastOffset = start; - } - } - - /** - * Call whenever a reference with no coordinate information is encountered in the bam file - */ - void incrementNoCoordinateRecordCount() { - noCoordinateRecords++; - } - - /** - * Set local variable. Normally noCoordinateRecord count accessed from AbstractBAMFileIndex when reading - */ - private void setNoCoordinateRecordCount(long count) { - noCoordinateRecords = count; - } - - - /** - * @return the count of records with no coordinate information in the bam file. - * Not public, since only used by BAMIndexer when writing bam index. - * Readers of bam index should use AbstractBAMFileIndex.getNoCoordinateRecordCount. - */ - long getNoCoordinateRecordCount() { - return noCoordinateRecords; - } - - /** - * @return the first virtual file offset used by this reference - */ - long getFirstOffset() { - return firstOffset; - } - - /** - * @return the last virtual file offset used by this reference - */ - long getLastOffset() { - return lastOffset; - } - - /** - * Return a new metadata object shifted by a given (non-virtual) offset. - * - * @param offset the offset in bytes - * @return a new metadata object shifted by the given offset - * @see BlockCompressedFilePointerUtil#shift(long, long) - */ - BAMIndexMetaData shift(final long offset) { - final long newFirstOffset = firstOffset == -1 ? firstOffset : BlockCompressedFilePointerUtil.shift(firstOffset, offset); // -1 is unset - final long newLastOffset = lastOffset == 0 ? lastOffset : BlockCompressedFilePointerUtil.shift(lastOffset, offset); // 0 is unset - return new BAMIndexMetaData(newFirstOffset, newLastOffset, alignedRecords, unAlignedRecords); - } - - /** - * Prints meta-data statistics from BAM index (.bai or .csi) file - * Statistics include count of aligned and unaligned reads for each reference sequence - * and a count of all records with no start coordinate - */ - static public void printIndexStats(final File inputBamFile) { - try { - final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); - if (!bam.hasIndex() || bam.getIndexType() == null) { - throw new SAMException("No index for bam file " + inputBamFile); - } - - BAMIndexMetaData[] data = getIndexStats(bam); - if (data == null) { - throw new SAMException("Exception in getting index statistics"); - } - - // read through all the bins of every reference. - int nRefs = bam.getFileHeader().getSequenceDictionary().size(); - for (int i = 0; i < nRefs; i++) { - final SAMSequenceRecord seq = bam.getFileHeader().getSequence(i); - if (seq == null) continue; - final String sequenceName = seq.getSequenceName(); - final int sequenceLength = seq.getSequenceLength(); - System.out.print(sequenceName + ' ' + "length=\t" + sequenceLength); - if (data[i] == null) { - System.out.println(); - continue; - } - System.out.println("\tAligned= " + data[i].getAlignedRecordCount() + - "\tUnaligned= " + data[i].getUnalignedRecordCount()); - } - System.out.println("NoCoordinateCount= " + data[0].getNoCoordinateRecordCount()); - } catch (IOException e) { - throw new SAMException("Exception in getting index statistics", e); - } - } - - /** - * Prints meta-data statistics from BAM index (.bai or .csi) file - * Statistics include count of aligned and unaligned reads for each reference sequence - * and a count of all records with no start coordinate - */ - static public BAMIndexMetaData[] getIndexStats(final BAMFileReader bam) { - - AbstractBAMFileIndex index = (AbstractBAMFileIndex) bam.getIndex(); - // read through all the bins of every reference. - int nRefs = index.getNumberOfReferences(); - BAMIndexMetaData[] result = new BAMIndexMetaData[nRefs == 0 ? 1 : nRefs]; - for (int i = 0; i < nRefs; i++) { - result[i] = index.getMetaData(i); - } - - if (result[0] == null) { - result[0] = new BAMIndexMetaData(); - } - final Long noCoordCount = index.getNoCoordinateCount(); - if (noCoordCount != null) // null in old index files without metadata - result[0].setNoCoordinateRecordCount(noCoordCount); - - return result; - } -} +/* + * The MIT License + * + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.samtools.cram.BAIEntry; +import htsjdk.samtools.util.BlockCompressedFilePointerUtil; +import java.io.File; +import java.io.IOException; +import java.util.List; + +/** + * Metadata about the bam index contained within the bam index. + * One instance created per index file. + */ +public class BAMIndexMetaData { + + // information for the entire index. + // stored at the end of the index + private long noCoordinateRecords = 0; + + // information for each reference. + // stored in two chunks in bin # MAX_BINS + private long firstOffset = -1; + private long lastOffset = 0; + private int alignedRecords = 0; + private int unAlignedRecords = 0; // unmapped, but associated with this reference + + /** + * Constructor used when writing an index + * construct one instance for each index generated + */ + BAMIndexMetaData() { + noCoordinateRecords = 0; + newReference(); + } + + /** + * Constructor used when reading an index + * construct one instance for each index generated + */ + BAMIndexMetaData(List chunkList) { + noCoordinateRecords = 0; + + if (chunkList == null || chunkList.isEmpty()) { + // System.out.println("No metadata chunks"); + } else if (chunkList.size() != 2) { + throw new SAMException("Unexpected number of metadata chunks " + (chunkList.size())); + } + // fill in the first/lastOffset un/alignedRecords from this + boolean firstChunk = true; + if (chunkList != null) { + for (Chunk c : chunkList) { + long start = c.getChunkStart(); + long end = c.getChunkEnd(); + if (firstChunk) { + firstOffset = start; + lastOffset = end; + firstChunk = false; + } else { + firstChunk = true; + alignedRecords = (int) start; + unAlignedRecords = (int) end; + } + } + } + } + + private BAMIndexMetaData( + final long firstOffset, final long lastOffset, final int alignedRecords, final int unAlignedRecords) { + this.firstOffset = firstOffset; + this.lastOffset = lastOffset; + this.alignedRecords = alignedRecords; + this.unAlignedRecords = unAlignedRecords; + } + + /** + * @return the count of aligned records associated with this reference + */ + public int getAlignedRecordCount() { + return alignedRecords; + } + + /** + * @return the count of unaligned records associated with this reference + */ + public int getUnalignedRecordCount() { + return unAlignedRecords; + } + + /** + * Call for each new reference sequence encountered + */ + void newReference() { + firstOffset = -1; + lastOffset = 0; + alignedRecords = 0; + unAlignedRecords = 0; + } + + /** + * Extract relevant metaData from the record and its filePointer + * Call only once per record in the file being indexed + * + * @param rec + */ + void recordMetaData(final SAMRecord rec) { + + final int alignmentStart = rec.getAlignmentStart(); + if (alignmentStart == SAMRecord.NO_ALIGNMENT_START) { + incrementNoCoordinateRecordCount(); + return; + } + + if (rec.getFileSource() == null) { + throw new SAMException("BAM cannot be indexed without setting a fileSource for record " + rec); + } + final Chunk newChunk = ((BAMFileSpan) rec.getFileSource().getFilePointer()).getSingleChunk(); + final long start = newChunk.getChunkStart(); + final long end = newChunk.getChunkEnd(); + + if (rec.getReadUnmappedFlag()) { + unAlignedRecords++; + } else { + alignedRecords++; + } + if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { + this.firstOffset = start; + } + if (BlockCompressedFilePointerUtil.compare(lastOffset, end) < 1) { + this.lastOffset = end; + } + } + + // The resolution of a CRAM BAI index is more coarse than for BAM BAI. Each entry + // is represented by a BAIEntry that represents a slice (or, in the case of + // MULTI_REFERENCE slices, a subset of a slice), rather than SAMRecords. + void recordMetaData(final BAIEntry baiEntry) { + alignedRecords += baiEntry.getMappedReadsCount(); + noCoordinateRecords += baiEntry.getUnmappedUnplacedReadsCount(); + unAlignedRecords += baiEntry.getUnmappedReadsCount(); + + final long start = baiEntry.getSliceByteOffsetFromCompressionHeaderStart(); + + if (BlockCompressedFilePointerUtil.compare(start, firstOffset) < 1 || firstOffset == -1) { + this.firstOffset = start; + // not actually used, so set it to a dummy value (start) + // see https://github.com/samtools/htsjdk/issues/401 + this.lastOffset = start; + } + } + + /** + * Call whenever a reference with no coordinate information is encountered in the bam file + */ + void incrementNoCoordinateRecordCount() { + noCoordinateRecords++; + } + + /** + * Set local variable. Normally noCoordinateRecord count accessed from AbstractBAMFileIndex when reading + */ + private void setNoCoordinateRecordCount(long count) { + noCoordinateRecords = count; + } + + /** + * @return the count of records with no coordinate information in the bam file. + * Not public, since only used by BAMIndexer when writing bam index. + * Readers of bam index should use AbstractBAMFileIndex.getNoCoordinateRecordCount. + */ + long getNoCoordinateRecordCount() { + return noCoordinateRecords; + } + + /** + * @return the first virtual file offset used by this reference + */ + long getFirstOffset() { + return firstOffset; + } + + /** + * @return the last virtual file offset used by this reference + */ + long getLastOffset() { + return lastOffset; + } + + /** + * Return a new metadata object shifted by a given (non-virtual) offset. + * + * @param offset the offset in bytes + * @return a new metadata object shifted by the given offset + * @see BlockCompressedFilePointerUtil#shift(long, long) + */ + BAMIndexMetaData shift(final long offset) { + final long newFirstOffset = firstOffset == -1 + ? firstOffset + : BlockCompressedFilePointerUtil.shift(firstOffset, offset); // -1 is unset + final long newLastOffset = + lastOffset == 0 ? lastOffset : BlockCompressedFilePointerUtil.shift(lastOffset, offset); // 0 is unset + return new BAMIndexMetaData(newFirstOffset, newLastOffset, alignedRecords, unAlignedRecords); + } + + /** + * Prints meta-data statistics from BAM index (.bai or .csi) file + * Statistics include count of aligned and unaligned reads for each reference sequence + * and a count of all records with no start coordinate + */ + public static void printIndexStats(final File inputBamFile) { + try { + final BAMFileReader bam = new BAMFileReader( + inputBamFile, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); + if (!bam.hasIndex() || bam.getIndexType() == null) { + throw new SAMException("No index for bam file " + inputBamFile); + } + + BAMIndexMetaData[] data = getIndexStats(bam); + if (data == null) { + throw new SAMException("Exception in getting index statistics"); + } + + // read through all the bins of every reference. + int nRefs = bam.getFileHeader().getSequenceDictionary().size(); + for (int i = 0; i < nRefs; i++) { + final SAMSequenceRecord seq = bam.getFileHeader().getSequence(i); + if (seq == null) continue; + final String sequenceName = seq.getSequenceName(); + final int sequenceLength = seq.getSequenceLength(); + System.out.print(sequenceName + ' ' + "length=\t" + sequenceLength); + if (data[i] == null) { + System.out.println(); + continue; + } + System.out.println("\tAligned= " + data[i].getAlignedRecordCount() + "\tUnaligned= " + + data[i].getUnalignedRecordCount()); + } + System.out.println("NoCoordinateCount= " + data[0].getNoCoordinateRecordCount()); + } catch (IOException e) { + throw new SAMException("Exception in getting index statistics", e); + } + } + + /** + * Prints meta-data statistics from BAM index (.bai or .csi) file + * Statistics include count of aligned and unaligned reads for each reference sequence + * and a count of all records with no start coordinate + */ + public static BAMIndexMetaData[] getIndexStats(final BAMFileReader bam) { + + AbstractBAMFileIndex index = (AbstractBAMFileIndex) bam.getIndex(); + // read through all the bins of every reference. + int nRefs = index.getNumberOfReferences(); + BAMIndexMetaData[] result = new BAMIndexMetaData[nRefs == 0 ? 1 : nRefs]; + for (int i = 0; i < nRefs; i++) { + result[i] = index.getMetaData(i); + } + + if (result[0] == null) { + result[0] = new BAMIndexMetaData(); + } + final Long noCoordCount = index.getNoCoordinateCount(); + if (noCoordCount != null) // null in old index files without metadata + result[0].setNoCoordinateRecordCount(noCoordCount); + + return result; + } +} diff --git a/src/main/java/htsjdk/samtools/BAMIndexWriter.java b/src/main/java/htsjdk/samtools/BAMIndexWriter.java index aafcb5fbff..b6aaa2a4cb 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexWriter.java +++ b/src/main/java/htsjdk/samtools/BAMIndexWriter.java @@ -30,7 +30,7 @@ * * @author mborkan */ -interface BAMIndexWriter extends Closeable { // note - only package visibility +interface BAMIndexWriter extends Closeable { // note - only package visibility /** * Write the data for one alignments to one reference sequence @@ -51,5 +51,4 @@ interface BAMIndexWriter extends Closeable { // note - only package visibility */ @Override public void close(); - -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/BAMIndexer.java b/src/main/java/htsjdk/samtools/BAMIndexer.java index 139af8d9a0..341b268bad 100644 --- a/src/main/java/htsjdk/samtools/BAMIndexer.java +++ b/src/main/java/htsjdk/samtools/BAMIndexer.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.Log; - import java.io.File; import java.io.OutputStream; import java.nio.file.Path; @@ -88,7 +87,8 @@ public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) { * if false, leave uninitialized values as -1, which is required when merging index files * (see {@link BAMIndexMerger}) */ - public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader, final boolean fillInUninitializedValues) { + public BAMIndexer( + final OutputStream output, final SAMFileHeader fileHeader, final boolean fillInUninitializedValues) { this(fileHeader, numRefs -> new BinaryBAMIndexWriter(numRefs, output), fillInUninitializedValues); } @@ -98,13 +98,16 @@ public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader, fin * @param fileHeader header for the corresponding bam file. * @param createWrite a lambda that, given an Integer numReferences value, will create a BinaryBAMIndexWriter * with that value and an appropriate output. - */ - private BAMIndexer(final SAMFileHeader fileHeader, Function createWriter, final boolean fillInUninitializedValues) { + */ + private BAMIndexer( + final SAMFileHeader fileHeader, + Function createWriter, + final boolean fillInUninitializedValues) { if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { if (fileHeader.getSortOrder() == SAMFileHeader.SortOrder.unsorted) { - log.warn("For indexing, the BAM file is required to be coordinate sorted. Attempting to index \"unsorted\" BAM file."); - } - else { + log.warn( + "For indexing, the BAM file is required to be coordinate sorted. Attempting to index \"unsorted\" BAM file."); + } else { throw new SAMException("Indexing requires a coordinate-sorted input BAM."); } } @@ -163,7 +166,7 @@ private void advanceToReference(final int nextReference) { * @param output BAM Index (.bai) file (or bai.txt file when text) * @param textOutput Whether to create text output or binary */ - static public void createAndWriteIndex(final File input, final File output, final boolean textOutput) { + public static void createAndWriteIndex(final File input, final File output, final boolean textOutput) { // content is from an existing bai file. @@ -231,8 +234,8 @@ public void processAlignment(final SAMRecord rec) { // various checks final int reference = rec.getReferenceIndex(); if (reference != currentReference) { - throw new SAMException("Unexpected reference " + reference + - " when constructing index for " + currentReference + " for record " + rec); + throw new SAMException("Unexpected reference " + reference + " when constructing index for " + + currentReference + " for record " + rec); } binningIndexBuilder.processFeature(new BinningIndexBuilder.FeatureToBeIndexed() { @@ -247,18 +250,20 @@ public int getEnd() { } @Override - public Integer getIndexingBin() { return rec.computeIndexingBin(); } + public Integer getIndexingBin() { + return rec.computeIndexingBin(); + } @Override public Chunk getChunk() { final SAMFileSource source = rec.getFileSource(); if (source == null) { - throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); + throw new SAMException( + "No source (virtual file offsets); needed for indexing on BAM Record " + rec); } return ((BAMFileSpan) source.getFilePointer()).getSingleChunk(); } }); - } /** @@ -270,14 +275,17 @@ public Chunk getChunk() { public BAMIndexContent processReference(final int reference) { if (reference != currentReference) { - throw new SAMException("Unexpected reference " + reference + " when constructing index for " + currentReference); + throw new SAMException( + "Unexpected reference " + reference + " when constructing index for " + currentReference); } final BinningIndexContent indexContent = binningIndexBuilder.generateIndexContent(); if (indexContent == null) return null; - return new BAMIndexContent(indexContent.getReferenceSequence(), indexContent.getBins(), - indexStats, indexContent.getLinearIndex()); - + return new BAMIndexContent( + indexContent.getReferenceSequence(), + indexContent.getBins(), + indexStats, + indexContent.getLinearIndex()); } /** @@ -295,8 +303,10 @@ void startNewReference() { // I'm not crazy about recycling this object, but that is the way it was originally written and // it helps keep track of no-coordinate read count (which shouldn't be stored in this class anyway). indexStats.newReference(); - binningIndexBuilder = new BinningIndexBuilder(currentReference, - sequenceDictionary.getSequence(currentReference).getSequenceLength(), fillInUninitializedValues); + binningIndexBuilder = new BinningIndexBuilder( + currentReference, + sequenceDictionary.getSequence(currentReference).getSequenceLength(), + fillInUninitializedValues); } } diff --git a/src/main/java/htsjdk/samtools/BAMIteratorFilter.java b/src/main/java/htsjdk/samtools/BAMIteratorFilter.java index bc7d2c0259..3af3ddaf47 100644 --- a/src/main/java/htsjdk/samtools/BAMIteratorFilter.java +++ b/src/main/java/htsjdk/samtools/BAMIteratorFilter.java @@ -1,12 +1,14 @@ package htsjdk.samtools; - /** * Interface implemented by filetering iterators used for BAM/CRAM readers. */ interface BAMIteratorFilter { public enum IntervalComparison { - BEFORE, AFTER, OVERLAPPING, CONTAINED + BEFORE, + AFTER, + OVERLAPPING, + CONTAINED } /** @@ -14,7 +16,9 @@ public enum IntervalComparison { * how to handle each SAMRecord. */ public enum FilteringIteratorState { - MATCHES_FILTER, STOP_ITERATION, CONTINUE_ITERATION + MATCHES_FILTER, + STOP_ITERATION, + CONTINUE_ITERATION } /** @@ -23,4 +27,3 @@ public enum FilteringIteratorState { */ FilteringIteratorState compareToFilter(final SAMRecord record); } - diff --git a/src/main/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilter.java b/src/main/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilter.java index 8dadc692eb..2f1fba9e0b 100644 --- a/src/main/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilter.java +++ b/src/main/java/htsjdk/samtools/BAMQueryMultipleIntervalsIteratorFilter.java @@ -10,9 +10,7 @@ public class BAMQueryMultipleIntervalsIteratorFilter implements BAMIteratorFilte final boolean contained; int intervalIndex = 0; - - public BAMQueryMultipleIntervalsIteratorFilter(final QueryInterval[] intervals, - final boolean contained) { + public BAMQueryMultipleIntervalsIteratorFilter(final QueryInterval[] intervals, final boolean contained) { this.contained = contained; this.intervals = intervals; } @@ -23,14 +21,20 @@ public FilteringIteratorState compareToFilter(final SAMRecord record) { final IntervalComparison comparison = compareIntervalToRecord(intervals[intervalIndex], record); switch (comparison) { // Interval is before SAMRecord. Try next interval; - case BEFORE: ++intervalIndex; break; + case BEFORE: + ++intervalIndex; + break; // Interval is after SAMRecord. Keep scanning forward in SAMRecords - case AFTER: return FilteringIteratorState.CONTINUE_ITERATION; + case AFTER: + return FilteringIteratorState.CONTINUE_ITERATION; // Found a good record - case CONTAINED: return FilteringIteratorState.MATCHES_FILTER; + case CONTAINED: + return FilteringIteratorState.MATCHES_FILTER; // Either found a good record, or else keep scanning SAMRecords - case OVERLAPPING: return - (contained ? FilteringIteratorState.CONTINUE_ITERATION : FilteringIteratorState.MATCHES_FILTER); + case OVERLAPPING: + return (contained + ? FilteringIteratorState.CONTINUE_ITERATION + : FilteringIteratorState.MATCHES_FILTER); } } // Went past the last interval @@ -39,7 +43,7 @@ public FilteringIteratorState compareToFilter(final SAMRecord record) { public static IntervalComparison compareIntervalToRecord(final QueryInterval interval, final SAMRecord record) { // interval.end <= 0 implies the end of the reference sequence. - final int intervalEnd = (interval.end <= 0? Integer.MAX_VALUE: interval.end); + final int intervalEnd = (interval.end <= 0 ? Integer.MAX_VALUE : interval.end); final int alignmentEnd; if (record.getReadUnmappedFlag() && record.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) { // Unmapped read with coordinate of mate. diff --git a/src/main/java/htsjdk/samtools/BAMRecord.java b/src/main/java/htsjdk/samtools/BAMRecord.java index 69744b4486..e55080a1ee 100644 --- a/src/main/java/htsjdk/samtools/BAMRecord.java +++ b/src/main/java/htsjdk/samtools/BAMRecord.java @@ -23,13 +23,12 @@ */ package htsjdk.samtools; -import htsjdk.samtools.util.StringUtil; +import static htsjdk.samtools.SAMTag.CG; +import htsjdk.samtools.util.StringUtil; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import static htsjdk.samtools.SAMTag.CG; - /** * Wrapper class for binary BAM records. * Delays unpacking all data binary until requested. @@ -44,7 +43,7 @@ public class BAMRecord extends SAMRecord { * Constant for converting between the number of operators in a Cigar and the length * of the int[] array needed to represent it in the BAM format */ - static public final short CIGAR_SIZE_MULTIPLIER = 4; + public static final short CIGAR_SIZE_MULTIPLIER = 4; /** * Maximal number of cigar operators that can be represented normally in the cigar part of the bam record. @@ -54,13 +53,13 @@ public class BAMRecord extends SAMRecord { * When a BAM record is decoded, the sentinel cigar informs of the existance of the CG tag, which is decoded and removed. * The sentinel value is then replaced with the actual cigar (in memory). */ - public final static int MAX_CIGAR_OPERATORS = 0xffff; + public static final int MAX_CIGAR_OPERATORS = 0xffff; - public final static int MAX_CIGAR_ELEMENT_LENGTH = (1 << 28) - 1; + public static final int MAX_CIGAR_ELEMENT_LENGTH = (1 << 28) - 1; /** * Number of operators in "Sentinel" cigar xSyN */ - private final static int LONG_CIGAR_SENTINEL_LENGTH = 2; + private static final int LONG_CIGAR_SENTINEL_LENGTH = 2; /** * Variable-length part of BAMRecord. Lazily decoded. @@ -93,28 +92,31 @@ public class BAMRecord extends SAMRecord { * than NO_ALIGNMENT_REFERENCE_INDEX (-1), then the specified index values must exist in the sequence dictionary * in the header argument. */ - protected BAMRecord(final SAMFileHeader header, - final int referenceID, - final int coordinate, - final short readNameLength, - final short mappingQuality, - final int indexingBin, - final int cigarLen, - final int flags, - final int readLen, - final int mateReferenceID, - final int mateCoordinate, - final int insertSize, - final byte[] restOfData) { + protected BAMRecord( + final SAMFileHeader header, + final int referenceID, + final int coordinate, + final short readNameLength, + final short mappingQuality, + final int indexingBin, + final int cigarLen, + final int flags, + final int readLen, + final int mateReferenceID, + final int mateCoordinate, + final int insertSize, + final byte[] restOfData) { super(header); - setReferenceIndex(referenceID); + // Set reference index and name directly, avoiding the round-trip through + // setReferenceIndex -> resolveNameFromIndex -> setReferenceName -> resolveIndexFromName + setReferenceNameAndIndex(resolveNameFromIndex(referenceID, header), referenceID); setAlignmentStart(coordinate); mReadNameLength = readNameLength; setMappingQuality(mappingQuality); mCigarLength = cigarLen; setFlags(flags); mReadLength = readLen; - setMateReferenceIndex(mateReferenceID); + setMateReferenceNameAndIndex(resolveNameFromIndex(mateReferenceID, header), mateReferenceID); setMateAlignmentStart(mateCoordinate); setInferredInsertSize(insertSize); mRestOfBinaryData = restOfData; @@ -279,7 +281,9 @@ public Cigar getCigar() { extractCigarFromCGAttribute(super.getCigar()); } - if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) { + if (null != getHeader() + && getValidationStringency() != ValidationStringency.SILENT + && !this.getReadUnmappedFlag()) { // Don't know line number, and don't want to force read name to be decoded. SAMUtils.processValidationErrors(validateCigar(-1L), -1, getValidationStringency()); } @@ -294,13 +298,12 @@ public Cigar getCigar() { */ static boolean isSentinelCigar(final Cigar cigar, final int readLength) { // There's an implicit assumption here there readLength == length of read in cigar, unless readLength==0 - return cigar.numCigarElements() == 2 && - cigar.getCigarElement(1).getOperator() == CigarOperator.N && - cigar.getCigarElement(0).getOperator() == CigarOperator.S && - (cigar.getCigarElement(0).getLength() == readLength || readLength == 0) ; + return cigar.numCigarElements() == 2 + && cigar.getCigarElement(1).getOperator() == CigarOperator.N + && cigar.getCigarElement(0).getOperator() == CigarOperator.S + && (cigar.getCigarElement(0).getLength() == readLength || readLength == 0); } - /** * Long cigars (with more than 64K operators) cannot be encoded into BAM. Instead a sentinel cigar is * placed as a placeholder, and the actual cigar is placed in the CG tag. This method @@ -312,8 +315,8 @@ private void extractCigarFromCGAttribute(final Cigar sentinelCigar) throws Illeg if (cigarFromCG == null) return; // place the integer array into a buffer so we can decode it - final ByteBuffer byteBuffer = ByteBuffer.allocate(cigarFromCG.length * CIGAR_SIZE_MULTIPLIER) - .order(ByteOrder.LITTLE_ENDIAN); + final ByteBuffer byteBuffer = + ByteBuffer.allocate(cigarFromCG.length * CIGAR_SIZE_MULTIPLIER).order(ByteOrder.LITTLE_ENDIAN); byteBuffer.asIntBuffer().put(cigarFromCG); // decode cigar @@ -323,9 +326,7 @@ private void extractCigarFromCGAttribute(final Cigar sentinelCigar) throws Illeg if (decodedCigar.numCigarElements() <= MAX_CIGAR_OPERATORS) { throw new IllegalStateException(String.format( "Only Cigar with > %d operators should be placed in CG tag. Found %d operators. \n Here's the Cigar:\n%s", - MAX_CIGAR_OPERATORS, - decodedCigar.getCigarElements().size(), - decodedCigar.toString())); + MAX_CIGAR_OPERATORS, decodedCigar.getCigarElements().size(), decodedCigar.toString())); } if (decodedCigar.getReferenceLength() != sentinelCigar.getReferenceLength()) { @@ -337,16 +338,13 @@ private void extractCigarFromCGAttribute(final Cigar sentinelCigar) throws Illeg decodedCigar.toString())); } - if (decodedCigar.getReadLength() != sentinelCigar.getReadLength() ) { + if (decodedCigar.getReadLength() != sentinelCigar.getReadLength()) { throw new IllegalStateException(String.format( "Sentinel cigar and %s cigar should have the same read length. Found %d and %d.\n Here's the Cigar:\n%s", - CG.name(), - sentinelCigar.getReadLength(), - decodedCigar.getReadLength(), - decodedCigar.toString())); + CG.name(), sentinelCigar.getReadLength(), decodedCigar.getReadLength(), decodedCigar.toString())); } - //used initializeCigar instead of setCigar so as to not clobber the indexingBin. + // used initializeCigar instead of setCigar so as to not clobber the indexingBin. initializeCigar(decodedCigar); // remove CG attribute. @@ -409,7 +407,8 @@ private void decodeAttributes() { mAttributesDecoded = true; final int tagsOffset = readNameSize() + cigarSize() + basesSize() + qualsSize(); final int tagsSize = mRestOfBinaryData.length - tagsOffset; - final SAMBinaryTagAndValue attributes = BinaryTagCodec.readTags(mRestOfBinaryData, tagsOffset, tagsSize, getValidationStringency()); + final SAMBinaryTagAndValue attributes = + BinaryTagCodec.readTags(mRestOfBinaryData, tagsOffset, tagsSize, getValidationStringency()); setAttributes(attributes); // if there's a CG tag, we should getCigar() so that the CG tag has a chance of turning into the CIGAR diff --git a/src/main/java/htsjdk/samtools/BAMRecordCodec.java b/src/main/java/htsjdk/samtools/BAMRecordCodec.java index c8915af9db..31c62fbe34 100644 --- a/src/main/java/htsjdk/samtools/BAMRecordCodec.java +++ b/src/main/java/htsjdk/samtools/BAMRecordCodec.java @@ -23,22 +23,21 @@ */ package htsjdk.samtools; +import static htsjdk.samtools.SAMTag.CG; + import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeEOFException; import htsjdk.samtools.util.SortingCollection; - import java.io.InputStream; import java.io.OutputStream; import java.util.Arrays; -import static htsjdk.samtools.SAMTag.CG; - /** * Class for translating between in-memory and disk representation of BAMRecord. */ public class BAMRecordCodec implements SortingCollection.Codec { - private final static Log LOG = Log.getInstance(BAMRecordCodec.class); + private static final Log LOG = Log.getInstance(BAMRecordCodec.class); private final SAMFileHeader header; private final BinaryCodec binaryCodec = new BinaryCodec(); @@ -116,14 +115,17 @@ public void encode(final SAMRecord alignment) { final int[] cigarEncoding = BinaryCigarCodec.encode(alignment.getCigar()); alignment.setAttribute(CG.name(), cigarEncoding); cigarToWrite = makeSentinelCigar(alignment.getCigar()); - } - else { + } else { cigarToWrite = alignment.getCigar(); } - int blockSize = BAMFileConstants.FIXED_BLOCK_SIZE + alignment.getReadNameLength() + 1 + // null terminated - cigarToWrite.numCigarElements() * BAMRecord.CIGAR_SIZE_MULTIPLIER + - (readLength + 1) / 2 + // 2 bases per byte, round up + int blockSize = BAMFileConstants.FIXED_BLOCK_SIZE + + alignment.getReadNameLength() + + 1 + + // null terminated + cigarToWrite.numCigarElements() * BAMRecord.CIGAR_SIZE_MULTIPLIER + + (readLength + 1) / 2 + + // 2 bases per byte, round up readLength; final int attributesSize = alignment.getAttributesBinarySize(); @@ -167,11 +169,12 @@ public void encode(final SAMRecord alignment) { // when the record was read from a BAM file. this.binaryCodec.writeBytes(variableLengthBinaryBlock); } else { - if (alignment.getReadLength() != alignment.getBaseQualities().length && - alignment.getBaseQualities().length != 0) { - throw new RuntimeException("Mismatch between read length and quals length writing read " + - alignment.getReadName() + "; read length: " + alignment.getReadLength() + - "; quals length: " + alignment.getBaseQualities().length); + if (alignment.getReadLength() != alignment.getBaseQualities().length + && alignment.getBaseQualities().length != 0) { + throw new RuntimeException( + "Mismatch between read length and quals length writing read " + alignment.getReadName() + + "; read length: " + alignment.getReadLength() + "; quals length: " + + alignment.getBaseQualities().length); } this.binaryCodec.writeString(alignment.getReadName(), false, true); final int[] binaryCigar = BinaryCigarCodec.encode(cigarToWrite); @@ -213,17 +216,15 @@ public void encode(final SAMRecord alignment) { public static Cigar makeSentinelCigar(final Cigar cigar) { // in BAM there are only 28 bits for a cigar operator, so this a protection against overflow. if (cigar.getReadLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { - throw new IllegalArgumentException( - String.format( - "Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.", - BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); + throw new IllegalArgumentException(String.format( + "Cannot encode (to BAM) a record with more than %d cigar operations and a read-length greater than %d.", + BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } if (cigar.getReferenceLength() > BAMRecord.MAX_CIGAR_ELEMENT_LENGTH) { - throw new IllegalArgumentException( - String.format( - "Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.", - BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); + throw new IllegalArgumentException(String.format( + "Cannot encode (to BAM) a record that has than %d cigar operations and spans more than %d bases on the reference.", + BAMRecord.MAX_CIGAR_OPERATORS, BAMRecord.MAX_CIGAR_ELEMENT_LENGTH)); } return new Cigar(Arrays.asList( @@ -237,11 +238,13 @@ public static Cigar makeSentinelCigar(final Cigar cigar) { * @return true if the sequence is too large, false otherwise */ private boolean warnIfReferenceIsTooLargeForBinField(final SAMRecord rec) { - final SAMSequenceRecord sequence = rec.getHeader() != null ? rec.getHeader().getSequence(rec.getReferenceName()) : null; + final SAMSequenceRecord sequence = + rec.getHeader() != null ? rec.getHeader().getSequence(rec.getReferenceName()) : null; final boolean tooLarge = sequence != null && SAMUtils.isReferenceSequenceIncompatibleWithBAI(sequence); if (!isReferenceSizeWarningShowed && tooLarge && rec.getValidationStringency() != ValidationStringency.SILENT) { LOG.warn("Reference length is too large for BAM bin field."); - LOG.warn("Reads on references longer than " + GenomicIndexUtil.BIN_GENOMIC_SPAN + "bp will have bin set to 0."); + LOG.warn("Reads on references longer than " + GenomicIndexUtil.BIN_GENOMIC_SPAN + + "bp will have bin set to 0."); isReferenceSizeWarningShowed = true; } @@ -281,13 +284,20 @@ public SAMRecord decode() { final byte[] restOfRecord = new byte[recordLength - BAMFileConstants.FIXED_BLOCK_SIZE]; this.binaryCodec.readBytes(restOfRecord); final BAMRecord ret = this.samRecordFactory.createBAMRecord( - header, referenceID, coordinate, readNameLength, mappingQuality, - bin, cigarLen, flags, readLen, mateReferenceID, mateCoordinate, insertSize, restOfRecord); + header, + referenceID, + coordinate, + readNameLength, + mappingQuality, + bin, + cigarLen, + flags, + readLen, + mateReferenceID, + mateCoordinate, + insertSize, + restOfRecord); - if (null != header) { - // don't reset a null header as this will clobber the reference and mate reference indices - ret.setHeader(header); - } return ret; } } diff --git a/src/main/java/htsjdk/samtools/BAMSBIIndexer.java b/src/main/java/htsjdk/samtools/BAMSBIIndexer.java index 6400f81ff7..ec0eb36813 100644 --- a/src/main/java/htsjdk/samtools/BAMSBIIndexer.java +++ b/src/main/java/htsjdk/samtools/BAMSBIIndexer.java @@ -30,7 +30,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeEOFException; - import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; @@ -52,7 +51,8 @@ public final class BAMSBIIndexer { */ public static void createIndex(final Path bamFile, final long granularity) throws IOException { final Path splittingBaiFile = IOUtil.addExtension(bamFile, FileExtensions.SBI); - try (SeekableStream in = new SeekablePathStream(bamFile); OutputStream out = Files.newOutputStream(splittingBaiFile)) { + try (SeekableStream in = new SeekablePathStream(bamFile); + OutputStream out = Files.newOutputStream(splittingBaiFile)) { createIndex(in, out, granularity); } } @@ -65,7 +65,8 @@ public static void createIndex(final Path bamFile, final long granularity) throw * @param granularity write the offset of every n-th alignment to the index * @throws IOException as per java IO contract */ - public static void createIndex(final SeekableStream in, final OutputStream out, final long granularity) throws IOException { + public static void createIndex(final SeekableStream in, final OutputStream out, final long granularity) + throws IOException { long recordStart = SAMUtils.findVirtualOffsetOfFirstRecordInBam(in); try (BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in)) { blockIn.seek(recordStart); diff --git a/src/main/java/htsjdk/samtools/BAMStartingAtIteratorFilter.java b/src/main/java/htsjdk/samtools/BAMStartingAtIteratorFilter.java index 8bd8eb51c8..533dabd447 100644 --- a/src/main/java/htsjdk/samtools/BAMStartingAtIteratorFilter.java +++ b/src/main/java/htsjdk/samtools/BAMStartingAtIteratorFilter.java @@ -57,7 +57,7 @@ public FilteringIteratorState compareToFilter(final SAMRecord record) { if (alignmentStart > mRegionStart) { // If scanned beyond target region, end iteration return FilteringIteratorState.STOP_ITERATION; - } else if (alignmentStart == mRegionStart) { + } else if (alignmentStart == mRegionStart) { return FilteringIteratorState.MATCHES_FILTER; } else { return FilteringIteratorState.CONTINUE_ITERATION; diff --git a/src/main/java/htsjdk/samtools/BAMStreamWriter.java b/src/main/java/htsjdk/samtools/BAMStreamWriter.java index c720655fb0..968c91a081 100644 --- a/src/main/java/htsjdk/samtools/BAMStreamWriter.java +++ b/src/main/java/htsjdk/samtools/BAMStreamWriter.java @@ -26,11 +26,10 @@ import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.RuntimeIOException; -import org.apache.commons.compress.utils.CountingOutputStream; - import java.io.IOException; import java.io.OutputStream; import java.nio.file.Path; +import org.apache.commons.compress.utils.CountingOutputStream; /** * Class for writing SAMRecords in BAM format to an output stream. @@ -53,7 +52,12 @@ public class BAMStreamWriter { * @param sbiGranularity the granularity of the SBI index (reads per entry) * @param header the SAM header */ - public BAMStreamWriter(OutputStream outputStream, OutputStream indexStream, OutputStream sbiStream, long sbiGranularity, SAMFileHeader header) { + public BAMStreamWriter( + OutputStream outputStream, + OutputStream indexStream, + OutputStream sbiStream, + long sbiGranularity, + SAMFileHeader header) { countingOut = new CountingOutputStream(outputStream); compressedOut = new BlockCompressedOutputStream(countingOut, (Path) null); bamRecordCodec = new BAMRecordCodec(header); @@ -123,8 +127,7 @@ public void finish(final boolean writeTerminatorBlock) { // If we didn't do this then we would have an invalid virtual file pointer if a BGZF file // were concatenated following this one. if (bamIndexer != null && previousSamRecord != null) { - previousSamRecordChunk = - new Chunk(previousSamRecordChunk.getChunkStart(), finalVirtualOffset); + previousSamRecordChunk = new Chunk(previousSamRecordChunk.getChunkStart(), finalVirtualOffset); previousSamRecord.setFileSource(new SAMFileSource(null, new BAMFileSpan(previousSamRecordChunk))); bamIndexer.processAlignment(previousSamRecord); } diff --git a/src/main/java/htsjdk/samtools/BamConverter.java b/src/main/java/htsjdk/samtools/BamConverter.java new file mode 100644 index 0000000000..f5b14d4cac --- /dev/null +++ b/src/main/java/htsjdk/samtools/BamConverter.java @@ -0,0 +1,125 @@ +package htsjdk.samtools; + +import java.io.File; + +/** + * Simple command-line tool for reading and optionally converting BAM files, primarily + * for experimenting with BAM read-path profiling. + * + *

    Usage: + *

    + *   java -cp htsjdk.jar htsjdk.samtools.BamConverter input.bam [output.bam]
    + * 
    + * + *

    If no output is specified, records are read and iterated but not written. + */ +public class BamConverter { + + private static final String USAGE = String.join( + "\n", + "Usage: BamConverter [output]", + "", + "Read and optionally convert a BAM file.", + "", + "Arguments:", + " input Input BAM file", + " output Optional output BAM file (omit to read-only)"); + + /** + * Entry point. Parses command-line arguments and performs the read/conversion. + * + * @param args command-line arguments (see USAGE for details) + */ + public static void main(final String[] args) { + if (hasFlag(args, "--help") || hasFlag(args, "-h")) { + System.out.println(USAGE); + System.exit(0); + } + if (args.length < 1) { + System.err.println(USAGE); + System.exit(1); + } + + final boolean eager = hasFlag(args, "--eager"); + // Collect positional args (non-flag arguments) + final String[] positional = + java.util.Arrays.stream(args).filter(a -> !a.startsWith("--")).toArray(String[]::new); + if (positional.length < 1) { + System.err.println(USAGE); + System.exit(1); + } + final String inputPath = positional[0]; + final String outputPath = positional.length > 1 ? positional[1] : null; + + if (outputPath != null) { + System.err.printf("Converting %s -> %s%s%n", inputPath, outputPath, eager ? " (eager decode)" : ""); + } else { + System.err.printf("Reading %s (no output%s)%n", inputPath, eager ? ", eager decode" : ""); + } + + final SamReaderFactory readerFactory = + SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); + + long count = 0; + final long startTime = System.currentTimeMillis(); + + try (final SamReader reader = readerFactory.open(new File(inputPath))) { + final SAMFileHeader header = reader.getFileHeader(); + + if (outputPath != null) { + final SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); + try (final SAMFileWriter writer = + writerFactory.makeBAMWriter(header, true, new File(outputPath).toPath())) { + for (final SAMRecord record : reader) { + if (eager) record.eagerDecode(); + writer.addAlignment(record); + count++; + if (count % 1_000_000 == 0) { + System.err.printf(" ... %,d records%n", count); + } + } + } + } else { + for (final SAMRecord record : reader) { + if (eager) record.eagerDecode(); + count++; + if (count % 1_000_000 == 0) { + System.err.printf(" ... %,d records%n", count); + } + } + } + } catch (final Exception e) { + die("Error: " + e.getMessage()); + } + + final long elapsed = System.currentTimeMillis() - startTime; + final long inputSize = new File(inputPath).length(); + + if (outputPath != null) { + final long outputSize = new File(outputPath).length(); + System.err.printf( + "Done. %,d records in %.1fs. Input: %,d bytes, Output: %,d bytes (%.1f%%)%n", + count, + elapsed / 1000.0, + inputSize, + outputSize, + inputSize > 0 ? (100.0 * outputSize / inputSize) : 0); + } else { + System.err.printf("Done. %,d records in %.1fs. Input: %,d bytes%n", count, elapsed / 1000.0, inputSize); + } + } + + private static boolean hasFlag(final String[] args, final String flag) { + for (final String arg : args) { + if (flag.equals(arg)) return true; + } + return false; + } + + private static void die(final String message) { + System.err.println("ERROR: " + message); + System.err.println(); + System.err.println(USAGE); + System.exit(1); + } +} diff --git a/src/main/java/htsjdk/samtools/BamFileIoUtils.java b/src/main/java/htsjdk/samtools/BamFileIoUtils.java index 709c8ed76d..761a22a8f7 100644 --- a/src/main/java/htsjdk/samtools/BamFileIoUtils.java +++ b/src/main/java/htsjdk/samtools/BamFileIoUtils.java @@ -13,14 +13,12 @@ import htsjdk.samtools.util.Md5CalculatingOutputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.utils.ValidationUtils; - import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import java.util.List; public class BamFileIoUtils { @@ -40,11 +38,15 @@ public static void reheaderBamFile(final SAMFileHeader samFileHeader, final Path reheaderBamFile(samFileHeader, inputFile, outputFile, true, true); } - /** * Support File input types for backward compatibility. Use the same method with Path inputs below. */ - public static void reheaderBamFile(final SAMFileHeader samFileHeader, final File inputFile, final File outputFile, final boolean createMd5, final boolean createIndex) { + public static void reheaderBamFile( + final SAMFileHeader samFileHeader, + final File inputFile, + final File outputFile, + final boolean createMd5, + final boolean createIndex) { reheaderBamFile(samFileHeader, IOUtil.toPath(inputFile), IOUtil.toPath(outputFile), createMd5, createIndex); } @@ -57,7 +59,12 @@ public static void reheaderBamFile(final SAMFileHeader samFileHeader, final File * @param createMd5 Whether or not to create an MD5 file for the new BAM * @param createIndex Whether or not to create an index file for the new BAM */ - public static void reheaderBamFile(final SAMFileHeader samFileHeader, final Path inputFile, final Path outputFile, final boolean createMd5, final boolean createIndex) { + public static void reheaderBamFile( + final SAMFileHeader samFileHeader, + final Path inputFile, + final Path outputFile, + final boolean createMd5, + final boolean createIndex) { ValidationUtils.nonNull(inputFile); ValidationUtils.nonNull(outputFile); IOUtil.assertFileIsReadable(inputFile); @@ -79,7 +86,11 @@ public static void reheaderBamFile(final SAMFileHeader samFileHeader, final Path } } - public static void blockCopyBamFile(final File inputFile, final OutputStream outputStream, final boolean skipHeader, final boolean skipTerminator) { + public static void blockCopyBamFile( + final File inputFile, + final OutputStream outputStream, + final boolean skipHeader, + final boolean skipTerminator) { blockCopyBamFile(IOUtil.toPath(inputFile), outputStream, skipHeader, skipTerminator); } @@ -91,10 +102,16 @@ public static void blockCopyBamFile(final File inputFile, final OutputStream out * @param skipHeader If true, the header of the input file will not be copied to the output stream * @param skipTerminator If true, the terminator block of the input file will not be written to the output stream */ - public static void blockCopyBamFile(final Path inputFile, final OutputStream outputStream, final boolean skipHeader, final boolean skipTerminator) { - try (final SeekablePathStream in = new SeekablePathStream(inputFile)){ - // a) It's good to check that the end of the file is valid and b) we need to know if there's a terminator block and not copy it if skipTerminator is true - final BlockCompressedInputStream.FileTermination term = BlockCompressedInputStream.checkTermination(inputFile); + public static void blockCopyBamFile( + final Path inputFile, + final OutputStream outputStream, + final boolean skipHeader, + final boolean skipTerminator) { + try (final SeekablePathStream in = new SeekablePathStream(inputFile)) { + // a) It's good to check that the end of the file is valid and b) we need to know if there's a terminator + // block and not copy it if skipTerminator is true + final BlockCompressedInputStream.FileTermination term = + BlockCompressedInputStream.checkTermination(inputFile); if (term == BlockCompressedInputStream.FileTermination.DEFECTIVE) throw new SAMException(inputFile.toUri() + " does not have a valid GZIP block at the end of the file."); @@ -109,7 +126,8 @@ public static void blockCopyBamFile(final Path inputFile, final OutputStream out // If we found the end of the header then write the remainder of this block out as a // new gzip block and then break out of the while loop (tsato: update this comment) if (remainingInBlock >= 0) { - final BlockCompressedOutputStream blockOut = new BlockCompressedOutputStream(outputStream, (Path) null); + final BlockCompressedOutputStream blockOut = + new BlockCompressedOutputStream(outputStream, (Path) null); IOUtil.transferByStream(blockIn, blockOut, remainingInBlock); blockOut.flush(); // Don't close blockOut because closing underlying stream would break everything @@ -119,7 +137,7 @@ public static void blockCopyBamFile(final Path inputFile, final OutputStream out blockIn.close(); // tsato: why doesn't IntelliJ say this is unnecessary? in.seek(pos); - } catch (IOException e){ + } catch (IOException e) { throw new HtsjdkException("Encountered an error.", e); } } @@ -127,8 +145,10 @@ public static void blockCopyBamFile(final Path inputFile, final OutputStream out // Copy remainder of input stream into output stream final long currentPos = in.position(); final long length = Files.size(inputFile); - final long skipLast = ((term == BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) && skipTerminator) ? - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length : 0; + final long skipLast = + ((term == BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) && skipTerminator) + ? BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length + : 0; final long bytesToWrite = length - skipLast - currentPos; IOUtil.transferByStream(in, outputStream, bytesToWrite); @@ -143,7 +163,8 @@ public static void blockCopyBamFile(final Path inputFile, final OutputStream out * (often the first block) and re-compress any data remaining in that block into a new block in the output file. Subsequent * blocks (excluding a terminator block if present) are copied directly from input to output. */ - public static void gatherWithBlockCopying(final List bams, final File output, final boolean createIndex, final boolean createMd5) { + public static void gatherWithBlockCopying( + final List bams, final File output, final boolean createIndex, final boolean createMd5) { try { OutputStream out = new FileOutputStream(output); if (createMd5) out = new Md5CalculatingOutputStream(out, new File(output.getAbsolutePath() + ".md5")); @@ -165,12 +186,15 @@ public static void gatherWithBlockCopying(final List bams, final File outp out.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK); out.close(); - // It is possible that the modified time on the index file is ever so slightly older than the original BAM file + // It is possible that the modified time on the index file is ever so slightly older than the original BAM + // file // and this makes ValidateSamFile unhappy. if (createIndex && (output.lastModified() > indexFile.lastModified())) { final boolean success = indexFile.setLastModified(System.currentTimeMillis()); if (!success) { - System.err.print(String.format("Index file is older than BAM file for %s and unable to resolve this", output.getAbsolutePath())); + System.err.print(String.format( + "Index file is older than BAM file for %s and unable to resolve this", + output.getAbsolutePath())); } } } catch (final IOException ioe) { @@ -178,33 +202,39 @@ public static void gatherWithBlockCopying(final List bams, final File outp } } - private static OutputStream buildOutputStream(final File outputFile, final boolean createMd5, final boolean createIndex) throws IOException { + private static OutputStream buildOutputStream( + final File outputFile, final boolean createMd5, final boolean createIndex) throws IOException { return buildOutputStream(IOUtil.toPath(outputFile), createMd5, createIndex); } - private static OutputStream buildOutputStream(final Path outputFile, final boolean createMd5, final boolean createIndex) throws IOException { + private static OutputStream buildOutputStream( + final Path outputFile, final boolean createMd5, final boolean createIndex) throws IOException { OutputStream outputStream = Files.newOutputStream(outputFile); if (createMd5) { - outputStream = new Md5CalculatingOutputStream(outputStream, IOUtil.addExtension(outputFile, FileExtensions.MD5)); + outputStream = + new Md5CalculatingOutputStream(outputStream, IOUtil.addExtension(outputFile, FileExtensions.MD5)); } if (createIndex) { - outputStream = new StreamInflatingIndexingOutputStream(outputStream, outputFile.resolveSibling(outputFile.getFileName() + FileExtensions.BAI_INDEX)); + outputStream = new StreamInflatingIndexingOutputStream( + outputStream, outputFile.resolveSibling(outputFile.getFileName() + FileExtensions.BAI_INDEX)); } return outputStream; } - @Deprecated - private static void assertSortOrdersAreEqual(final SAMFileHeader newHeader, final File inputFile) throws IOException { + private static void assertSortOrdersAreEqual(final SAMFileHeader newHeader, final File inputFile) + throws IOException { assertSortOrdersAreEqual(newHeader, IOUtil.toPath(inputFile)); } - private static void assertSortOrdersAreEqual(final SAMFileHeader newHeader, final Path inputFile) throws IOException { + private static void assertSortOrdersAreEqual(final SAMFileHeader newHeader, final Path inputFile) + throws IOException { final SamReader reader = SamReaderFactory.makeDefault().open(inputFile); final SAMFileHeader origHeader = reader.getFileHeader(); final SAMFileHeader.SortOrder newSortOrder = newHeader.getSortOrder(); if (newSortOrder != SAMFileHeader.SortOrder.unsorted && newSortOrder != origHeader.getSortOrder()) { - throw new SAMException("Sort order of new header does not match the original file, needs to be " + origHeader.getSortOrder()); + throw new SAMException("Sort order of new header does not match the original file, needs to be " + + origHeader.getSortOrder()); } reader.close(); } diff --git a/src/main/java/htsjdk/samtools/BamIndexValidator.java b/src/main/java/htsjdk/samtools/BamIndexValidator.java index f3b5cbe5ae..0295b8c3a2 100644 --- a/src/main/java/htsjdk/samtools/BamIndexValidator.java +++ b/src/main/java/htsjdk/samtools/BamIndexValidator.java @@ -1,204 +1,232 @@ -/* - * The MIT License - * - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools; - -import htsjdk.samtools.util.CloseableIterator; - -import java.util.Arrays; -import java.util.List; - -/** - * Class to validate (at two different levels of thoroughness) the index for a BAM file. - * - * This class is [not] thread safe [because it is immutable]. - */ -public class BamIndexValidator { - - public enum IndexValidationStringency { - EXHAUSTIVE, LESS_EXHAUSTIVE, NONE - } - - public static int exhaustivelyTestIndex(final SamReader reader) { // throws Exception { - // look at all chunk offsets in a linear index to make sure they are valid - - if (reader.indexing().hasBrowseableIndex()) { - if (SamIndexes.BAI.fileNameSuffix.endsWith(reader.type().indexExtension())) { - - // content is from an existing bai file - final CachingBAMFileIndex existingIndex = (CachingBAMFileIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); - final int numRefs = existingIndex.getNumberOfReferences(); - - int chunkCount = 0; - int indexCount = 0; - for (int i = 0; i < numRefs; i++) { - final BAMIndexContent content = existingIndex.getQueryResults(i); - for (final Chunk c : content.getAllChunks()) { - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); - chunkCount++; - SAMRecord sam = null; - try { - sam = iter.next(); - iter.close(); - } catch (final Exception e) { - throw new SAMException("Exception in BamIndexValidator. Last good record " + sam + " in chunk " + c + " chunkCount=" + chunkCount, e); - } - } - // also seek to every position in the linear index - // final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(reader.getFileHeader()); - // bamRecordCodec.setInputStream(reader.getInputStream()); - - final LinearIndex linearIndex = content.getLinearIndex(); - for (final long l : linearIndex.getIndexEntries()) { - try { - if (l != 0) { - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(new Chunk(l, l + 1))); - final SAMRecord sam = iter.next(); // read the first record identified by the linear index - indexCount++; - iter.close(); - } - } catch (final Exception e) { - throw new SAMException("Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + indexCount, e); - } - - } - } - return chunkCount; - // System.out.println("Found " chunkCount + " chunks in test " + inputBai + - // " linearIndex positions = " + indexCount); - } else if (SamIndexes.CSI.fileNameSuffix.endsWith(reader.type().indexExtension())) { - - final CSIIndex existingIndex = (CSIIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); - final int numRefs = existingIndex.getNumberOfReferences(); - - int chunkCount = 0; - for (int i = 0; i < numRefs; i++) { - final BAMIndexContent content = existingIndex.getQueryResults(i); - for (final Chunk c : content.getAllChunks()) { - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); - chunkCount++; - SAMRecord sam = null; - try { - sam = iter.next(); - iter.close(); - } catch (final Exception e) { - throw new SAMException("Exception in BamIndexValidator. Last good record " + sam + " in chunk " + c + " chunkCount=" + chunkCount, e); - } - } - } - return chunkCount; - } - } - // else not a bam file with a browseable index - // System.err.println("No browseableIndex for reader"); - return 0; - } - - /** - * A less time-consuming index validation that only looks at the first and last references in the index - * and the first and last chunks in each of those - * - * @param reader - * @return # of chunks examined, or 0 if there is no browseable index for the reader - */ - public static int lessExhaustivelyTestIndex(final SamReader reader) { - // look at all chunk offsets in a linear index to make sure they are valid - if (reader.indexing().hasBrowseableIndex()) { - if (SamIndexes.BAI.fileNameSuffix.endsWith(reader.type().indexExtension())) { - - // content is from an existing bai file - final CachingBAMFileIndex existingIndex = (CachingBAMFileIndex) reader.indexing().getBrowseableIndex(); - final int numRefs = existingIndex.getNumberOfReferences(); - - int chunkCount = 0; - int indexCount = 0; - for (int i = 0; i < numRefs; i++) { - - final BAMIndexContent content = existingIndex.getQueryResults(i); - - final List chunks = content.getAllChunks(); - final int numChunks = chunks.size(); - // We are looking only at the first and last chunks - for (final int chunkNo : Arrays.asList(0, numChunks - 1)) { - chunkCount++; - - final Chunk c = chunks.get(chunkNo); - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); - try { - final SAMRecord sam = iter.next(); - iter.close(); - } catch (final Exception e) { - throw new SAMException("Exception querying chunk " + chunkNo + " from reference index " + i, e); - } - } - - // also seek to first and last position in the linear index - final long linearIndexEntries[] = content.getLinearIndex().getIndexEntries(); - for (final int binNo : Arrays.asList(0, linearIndexEntries.length - 1)) { - indexCount++; - final long l = linearIndexEntries[binNo]; - try { - if (l != 0) { - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(new Chunk(l, l + 1))); - final SAMRecord sam = iter.next(); // read the first record identified by the linear index - iter.close(); - } - } catch (final Exception e) { - throw new SAMException("Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + indexCount, e); - } - } - } - return chunkCount; - } else if (SamIndexes.CSI.fileNameSuffix.endsWith(reader.type().indexExtension())) { - - final CSIIndex existingIndex = (CSIIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); - final int numRefs = existingIndex.getNumberOfReferences(); - - int chunkCount = 0; - for (int i = 0; i < numRefs; i++) { - - final BAMIndexContent content = existingIndex.getQueryResults(i); - - final List chunks = content.getAllChunks(); - final int numChunks = chunks.size(); - // We are looking only at the first and last chunks - for (final int chunkNo : Arrays.asList(0, numChunks - 1)) { - chunkCount++; - - final Chunk c = chunks.get(chunkNo); - final CloseableIterator iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); - try { - final SAMRecord sam = iter.next(); - iter.close(); - } catch (final Exception e) { - throw new SAMException("Exception querying chunk " + chunkNo + " from reference index " + i, e); - } - } - } - return chunkCount; - } - } - // else it's not a bam file with a browseable index - return 0; - } -} +/* + * The MIT License + * + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.samtools.util.CloseableIterator; +import java.util.Arrays; +import java.util.List; + +/** + * Class to validate (at two different levels of thoroughness) the index for a BAM file. + * + * This class is [not] thread safe [because it is immutable]. + */ +public class BamIndexValidator { + + public enum IndexValidationStringency { + EXHAUSTIVE, + LESS_EXHAUSTIVE, + NONE + } + + public static int exhaustivelyTestIndex(final SamReader reader) { // throws Exception { + // look at all chunk offsets in a linear index to make sure they are valid + + if (reader.indexing().hasBrowseableIndex()) { + if (SamIndexes.BAI.fileNameSuffix.endsWith(reader.type().indexExtension())) { + + // content is from an existing bai file + final CachingBAMFileIndex existingIndex = (CachingBAMFileIndex) + reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); + final int numRefs = existingIndex.getNumberOfReferences(); + + int chunkCount = 0; + int indexCount = 0; + for (int i = 0; i < numRefs; i++) { + final BAMIndexContent content = existingIndex.getQueryResults(i); + for (final Chunk c : content.getAllChunks()) { + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); + chunkCount++; + SAMRecord sam = null; + try { + sam = iter.next(); + iter.close(); + } catch (final Exception e) { + throw new SAMException( + "Exception in BamIndexValidator. Last good record " + sam + " in chunk " + c + + " chunkCount=" + chunkCount, + e); + } + } + // also seek to every position in the linear index + // final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(reader.getFileHeader()); + // bamRecordCodec.setInputStream(reader.getInputStream()); + + final LinearIndex linearIndex = content.getLinearIndex(); + for (final long l : linearIndex.getIndexEntries()) { + try { + if (l != 0) { + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader) + .iterator(new BAMFileSpan(new Chunk(l, l + 1))); + final SAMRecord sam = + iter.next(); // read the first record identified by the linear index + indexCount++; + iter.close(); + } + } catch (final Exception e) { + throw new SAMException( + "Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + + indexCount, + e); + } + } + } + return chunkCount; + // System.out.println("Found " chunkCount + " chunks in test " + inputBai + + // " linearIndex positions = " + indexCount); + } else if (SamIndexes.CSI.fileNameSuffix.endsWith(reader.type().indexExtension())) { + + final CSIIndex existingIndex = + (CSIIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); + final int numRefs = existingIndex.getNumberOfReferences(); + + int chunkCount = 0; + for (int i = 0; i < numRefs; i++) { + final BAMIndexContent content = existingIndex.getQueryResults(i); + for (final Chunk c : content.getAllChunks()) { + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); + chunkCount++; + SAMRecord sam = null; + try { + sam = iter.next(); + iter.close(); + } catch (final Exception e) { + throw new SAMException( + "Exception in BamIndexValidator. Last good record " + sam + " in chunk " + c + + " chunkCount=" + chunkCount, + e); + } + } + } + return chunkCount; + } + } + // else not a bam file with a browseable index + // System.err.println("No browseableIndex for reader"); + return 0; + } + + /** + * A less time-consuming index validation that only looks at the first and last references in the index + * and the first and last chunks in each of those + * + * @param reader + * @return # of chunks examined, or 0 if there is no browseable index for the reader + */ + public static int lessExhaustivelyTestIndex(final SamReader reader) { + // look at all chunk offsets in a linear index to make sure they are valid + if (reader.indexing().hasBrowseableIndex()) { + if (SamIndexes.BAI.fileNameSuffix.endsWith(reader.type().indexExtension())) { + + // content is from an existing bai file + final CachingBAMFileIndex existingIndex = + (CachingBAMFileIndex) reader.indexing().getBrowseableIndex(); + final int numRefs = existingIndex.getNumberOfReferences(); + + int chunkCount = 0; + int indexCount = 0; + for (int i = 0; i < numRefs; i++) { + + final BAMIndexContent content = existingIndex.getQueryResults(i); + + final List chunks = content.getAllChunks(); + final int numChunks = chunks.size(); + // We are looking only at the first and last chunks + for (final int chunkNo : Arrays.asList(0, numChunks - 1)) { + chunkCount++; + + final Chunk c = chunks.get(chunkNo); + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); + try { + final SAMRecord sam = iter.next(); + iter.close(); + } catch (final Exception e) { + throw new SAMException( + "Exception querying chunk " + chunkNo + " from reference index " + i, e); + } + } + + // also seek to first and last position in the linear index + final long linearIndexEntries[] = content.getLinearIndex().getIndexEntries(); + for (final int binNo : Arrays.asList(0, linearIndexEntries.length - 1)) { + indexCount++; + final long l = linearIndexEntries[binNo]; + try { + if (l != 0) { + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader) + .iterator(new BAMFileSpan(new Chunk(l, l + 1))); + final SAMRecord sam = + iter.next(); // read the first record identified by the linear index + iter.close(); + } + } catch (final Exception e) { + throw new SAMException( + "Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + + indexCount, + e); + } + } + } + return chunkCount; + } else if (SamIndexes.CSI.fileNameSuffix.endsWith(reader.type().indexExtension())) { + + final CSIIndex existingIndex = + (CSIIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null); + final int numRefs = existingIndex.getNumberOfReferences(); + + int chunkCount = 0; + for (int i = 0; i < numRefs; i++) { + + final BAMIndexContent content = existingIndex.getQueryResults(i); + + final List chunks = content.getAllChunks(); + final int numChunks = chunks.size(); + // We are looking only at the first and last chunks + for (final int chunkNo : Arrays.asList(0, numChunks - 1)) { + chunkCount++; + + final Chunk c = chunks.get(chunkNo); + final CloseableIterator iter = + ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c)); + try { + final SAMRecord sam = iter.next(); + iter.close(); + } catch (final Exception e) { + throw new SAMException( + "Exception querying chunk " + chunkNo + " from reference index " + i, e); + } + } + } + return chunkCount; + } + } + // else it's not a bam file with a browseable index + return 0; + } +} diff --git a/src/main/java/htsjdk/samtools/Bin.java b/src/main/java/htsjdk/samtools/Bin.java index 68d83d6a51..67e1bc07ec 100644 --- a/src/main/java/htsjdk/samtools/Bin.java +++ b/src/main/java/htsjdk/samtools/Bin.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.BlockCompressedFilePointerUtil; - import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -78,10 +77,10 @@ public int getBinNumber() { */ @Override public boolean equals(final Object other) { - if(other == null) return false; - if(!(other instanceof Bin)) return false; + if (other == null) return false; + if (!(other instanceof Bin)) return false; - final Bin otherBin = (Bin)other; + final Bin otherBin = (Bin) other; return this.referenceSequence == otherBin.referenceSequence && this.binNumber == otherBin.binNumber; } @@ -91,7 +90,7 @@ public boolean equals(final Object other) { */ @Override public int hashCode() { - return ((Integer)referenceSequence).hashCode() ^ ((Integer)binNumber).hashCode(); + return ((Integer) referenceSequence).hashCode() ^ ((Integer) binNumber).hashCode(); } /** @@ -105,16 +104,14 @@ public boolean containsChunks() { /** * Compare two bins to see what ordering they should appear in. * @param other Other bin to which this bin should be compared. - * @return -1 if this < other, 0 if this == other, 1 if this > other. + * @return {@code -1 if this < other, 0 if this == other, 1 if this > other}. */ @Override public int compareTo(final Bin other) { - if(other == null) - throw new ClassCastException("Cannot compare to a null object"); + if (other == null) throw new ClassCastException("Cannot compare to a null object"); // Check the reference sequences first. - if(this.referenceSequence != other.referenceSequence) - return referenceSequence - other.referenceSequence; + if (this.referenceSequence != other.referenceSequence) return referenceSequence - other.referenceSequence; // Then check the bin ordering. return binNumber - other.binNumber; @@ -123,7 +120,7 @@ public int compareTo(final Bin other) { /** * Adds the first chunk to the bin */ - public void addInitialChunk(final Chunk newChunk){ + public void addInitialChunk(final Chunk newChunk) { final List oldChunks = new ArrayList(); setChunkList(oldChunks); setLastChunk(newChunk); @@ -146,8 +143,7 @@ public void addChunk(final Chunk newChunk) { // Coalesce chunks that are in the same or adjacent file blocks. // Similar to AbstractBAMFileIndex.optimizeChunkList, // but no need to copy the list, no minimumOffset, and maintain bin.lastChunk - if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks( - lastChunk.getChunkEnd(), chunkStart)) { + if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks(lastChunk.getChunkEnd(), chunkStart)) { lastChunk.setChunkEnd(chunkEnd); // coalesced } else { chunkList.add(newChunk); @@ -159,7 +155,7 @@ public void addChunk(final Chunk newChunk) { /** * Sets the chunks associated with this bin */ - public void setChunkList(final List list){ + public void setChunkList(final List list) { chunkList = list; } @@ -167,16 +163,15 @@ public void setChunkList(final List list){ * Gets the list of chunks associated with this bin. * @return the chunks in this bin. If no chunks are associated, an empty list will be returned. */ - public List getChunkList(){ - if(chunkList == null) - return Collections.emptyList(); + public List getChunkList() { + if (chunkList == null) return Collections.emptyList(); return chunkList; } /** * Optimization to keep lastChunk instead of iterating over all chunks repeatedly */ - public void setLastChunk(final Chunk c){ + public void setLastChunk(final Chunk c) { lastChunk = c; } @@ -185,7 +180,7 @@ public void setLastChunk(final Chunk c){ * (AbstractBAMFileIndex.optimizeChunkList doesn't maintain this) * @return the last Chunk of the chunkList */ - public Chunk getLastChunk(){ + public Chunk getLastChunk() { return lastChunk; } diff --git a/src/main/java/htsjdk/samtools/BinList.java b/src/main/java/htsjdk/samtools/BinList.java index 2111ba403f..8c1b9f7f6c 100644 --- a/src/main/java/htsjdk/samtools/BinList.java +++ b/src/main/java/htsjdk/samtools/BinList.java @@ -107,11 +107,10 @@ public boolean hasNext() { */ @Override public Bin next() { - if(!hasNext()) - throw new NoSuchElementException("This BinIterator is currently empty"); + if (!hasNext()) throw new NoSuchElementException("This BinIterator is currently empty"); int currentBin = nextBin; - nextBin = bins.nextSetBit(nextBin+1); - return new Bin(referenceSequence,currentBin); + nextBin = bins.nextSetBit(nextBin + 1); + return new Bin(referenceSequence, currentBin); } @Override @@ -120,4 +119,3 @@ public void remove() { } } } - diff --git a/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java b/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java index ec9c7e5371..606f07de87 100644 --- a/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java +++ b/src/main/java/htsjdk/samtools/BinaryBAMIndexWriter.java @@ -26,7 +26,6 @@ import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.io.IOException; import java.io.OutputStream; @@ -97,11 +96,11 @@ public void writeReference(final BAMIndexContent content) { return; } - if (content.getReferenceSequence() != count){ - throw new SAMException("Unexpectedly writing reference " + content.getReferenceSequence() + - ", expecting reference " + count); + if (content.getReferenceSequence() != count) { + throw new SAMException("Unexpectedly writing reference " + content.getReferenceSequence() + + ", expecting reference " + count); } - count ++; + count++; // write bins @@ -113,21 +112,19 @@ public void writeReference(final BAMIndexContent content) { return; } - //final List chunks = content.getMetaData() == null ? null + // final List chunks = content.getMetaData() == null ? null // : content.getMetaData().getMetaDataChunks(); final BAMIndexMetaData metaData = content.getMetaData(); - codec.writeInt(size + ((metaData != null)? 1 : 0 )); + codec.writeInt(size + ((metaData != null) ? 1 : 0)); // codec.writeInt(size); - for (final Bin bin : bins) { // note, bins will always be sorted - if (bin.getBinNumber() == GenomicIndexUtil.MAX_BINS) - continue; + for (final Bin bin : bins) { // note, bins will always be sorted + if (bin.getBinNumber() == GenomicIndexUtil.MAX_BINS) continue; writeBin(bin); } - // write metadata "bin" and chunks - if (metaData != null) - writeChunkMetaData(metaData); + // write metadata "bin" and chunks + if (metaData != null) writeChunkMetaData(metaData); // write linear index @@ -149,7 +146,8 @@ public void writeReference(final BAMIndexContent content) { try { codec.getOutputStream().flush(); } catch (final IOException e) { - throw new SAMException("IOException in BinaryBAMIndexWriter reference " + content.getReferenceSequence(), e); + throw new SAMException( + "IOException in BinaryBAMIndexWriter reference " + content.getReferenceSequence(), e); } } @@ -173,12 +171,12 @@ public void close() { private void writeBin(final Bin bin) { final int binNumber = bin.getBinNumber(); - if (binNumber >= GenomicIndexUtil.MAX_BINS){ + if (binNumber >= GenomicIndexUtil.MAX_BINS) { throw new SAMException("Unexpected bin number when writing bam index " + binNumber); } - + codec.writeInt(binNumber); - if (bin.getChunkList() == null){ + if (bin.getChunkList() == null) { codec.writeInt(0); return; } @@ -204,7 +202,6 @@ private void writeChunkMetaData(final BAMIndexMetaData metaData) { codec.writeLong(metaData.getLastOffset()); codec.writeLong(metaData.getAlignedRecordCount()); codec.writeLong(metaData.getUnalignedRecordCount()); - } private void writeHeader() { @@ -215,6 +212,6 @@ private void writeHeader() { } private void writeNullContent() { - codec.writeLong(0); // 0 bins , 0 intv + codec.writeLong(0); // 0 bins , 0 intv } } diff --git a/src/main/java/htsjdk/samtools/BinaryTagCodec.java b/src/main/java/htsjdk/samtools/BinaryTagCodec.java index 5603cfc071..94f5c8bdf0 100644 --- a/src/main/java/htsjdk/samtools/BinaryTagCodec.java +++ b/src/main/java/htsjdk/samtools/BinaryTagCodec.java @@ -25,7 +25,6 @@ import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.StringUtil; - import java.lang.reflect.Array; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -69,7 +68,7 @@ public BinaryTagCodec(final BinaryCodec binaryCodec) { private static int getBinaryValueSize(final Object attributeValue) { switch (getTagValueType(attributeValue)) { case 'Z': - return ((String)attributeValue).length() + 1; + return ((String) attributeValue).length() + 1; case 'A': return 1; case 'I': @@ -84,26 +83,26 @@ private static int getBinaryValueSize(final Object attributeValue) { case 'f': return 4; case 'H': - final byte[] byteArray = (byte[])attributeValue; + final byte[] byteArray = (byte[]) attributeValue; return byteArray.length * 2 + 1; case 'B': final int numElements = Array.getLength(attributeValue); final int elementSize; - if(attributeValue instanceof byte[]) { + if (attributeValue instanceof byte[]) { elementSize = 1; - } else if(attributeValue instanceof short[]) { + } else if (attributeValue instanceof short[]) { elementSize = 2; - } else if(attributeValue instanceof int[]) { + } else if (attributeValue instanceof int[]) { elementSize = 4; - } else if(attributeValue instanceof float[]) { + } else if (attributeValue instanceof float[]) { elementSize = 4; } else { throw new IllegalArgumentException("Unsupported array type: " + attributeValue.getClass()); } return numElements * elementSize + FIXED_BINARY_ARRAY_TAG_SIZE; default: - throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + - attributeValue.getClass().getName()); + throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + + attributeValue.getClass().getName()); } } @@ -127,21 +126,27 @@ static char getTagValueType(final Object value) { } else if (value instanceof Float) { return 'f'; } else if (value instanceof Number) { - if (!(value instanceof Byte || value instanceof Short || value instanceof Integer || value instanceof Long)) { - throw new IllegalArgumentException("Unrecognized tag type " + value.getClass().getName()); + if (!(value instanceof Byte + || value instanceof Short + || value instanceof Integer + || value instanceof Long)) { + throw new IllegalArgumentException( + "Unrecognized tag type " + value.getClass().getName()); } - return getIntegerType(((Number)value).longValue()); + return getIntegerType(((Number) value).longValue()); } /* Note that H tag type is never written anymore, because B style is more compact. else if (value instanceof byte[]) { return 'H'; } - */ - else if (value instanceof byte[] || value instanceof short[] || value instanceof int[] || value instanceof float[]) { + */ else if (value instanceof byte[] + || value instanceof short[] + || value instanceof int[] + || value instanceof float[]) { return 'B'; } else { - throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + - value.getClass().getName()); + throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + + value.getClass().getName()); } } @@ -149,7 +154,7 @@ else if (value instanceof byte[] || value instanceof short[] || value instanceof * @param val Integer tag value. * @return Tag type corresponding to the smallest integer type that will hold the given value. */ - static private char getIntegerType(final long val) { + private static char getIntegerType(final long val) { if (val > MAX_UINT) { throw new IllegalArgumentException("Integer attribute value too large to be encoded in BAM"); } @@ -190,31 +195,31 @@ public void writeTag(final short tag, final Object value, final boolean isUnsign switch (tagValueType) { case 'Z': - binaryCodec.writeString((String)value, false, true); + binaryCodec.writeString((String) value, false, true); break; case 'A': - binaryCodec.writeByte(((Character)value)); + binaryCodec.writeByte(((Character) value)); break; case 'I': - binaryCodec.writeUInt((Long)value); + binaryCodec.writeUInt((Long) value); break; case 'i': - binaryCodec.writeInt(((Number)value).intValue()); + binaryCodec.writeInt(((Number) value).intValue()); break; case 's': - binaryCodec.writeShort(((Number)value).shortValue()); + binaryCodec.writeShort(((Number) value).shortValue()); break; case 'S': - binaryCodec.writeUShort(((Number)value).intValue()); + binaryCodec.writeUShort(((Number) value).intValue()); break; case 'c': - binaryCodec.writeByte(((Number)value).byteValue()); + binaryCodec.writeByte(((Number) value).byteValue()); break; case 'C': - binaryCodec.writeUByte(((Integer)value).shortValue()); + binaryCodec.writeUByte(((Integer) value).shortValue()); break; case 'f': - binaryCodec.writeFloat((Float)value); + binaryCodec.writeFloat((Float) value); break; /* Writing H is no longer supported @@ -227,35 +232,35 @@ public void writeTag(final short tag, final Object value, final boolean isUnsign writeArray(value, isUnsignedArray); break; default: - throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + - value.getClass().getName()); + throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + + value.getClass().getName()); } } private void writeArray(final Object value, final boolean isUnsignedArray) { if (value instanceof byte[]) { - binaryCodec.writeByte(isUnsignedArray? 'C': 'c'); + binaryCodec.writeByte(isUnsignedArray ? 'C' : 'c'); final byte[] array = (byte[]) value; binaryCodec.writeInt(array.length); - for (final byte element: array) binaryCodec.writeByte(element); + for (final byte element : array) binaryCodec.writeByte(element); } else if (value instanceof short[]) { - binaryCodec.writeByte(isUnsignedArray? 'S': 's'); + binaryCodec.writeByte(isUnsignedArray ? 'S' : 's'); final short[] array = (short[]) value; binaryCodec.writeInt(array.length); - for (final short element: array) binaryCodec.writeShort(element); + for (final short element : array) binaryCodec.writeShort(element); } else if (value instanceof int[]) { - binaryCodec.writeByte(isUnsignedArray? 'I': 'i'); + binaryCodec.writeByte(isUnsignedArray ? 'I' : 'i'); final int[] array = (int[]) value; binaryCodec.writeInt(array.length); - for (final int element: array) binaryCodec.writeInt(element); + for (final int element : array) binaryCodec.writeInt(element); } else if (value instanceof float[]) { binaryCodec.writeByte('f'); final float[] array = (float[]) value; binaryCodec.writeInt(array.length); - for (final float element: array) binaryCodec.writeFloat(element); + for (final float element : array) binaryCodec.writeFloat(element); } else throw new SAMException("Unrecognized array value type: " + value.getClass()); } @@ -266,8 +271,11 @@ private void writeArray(final Object value, final boolean isUnsignedArray) { * @param offset Where in binaryRep tags start. * @param length How many bytes in binaryRep are tag storage. */ - public static SAMBinaryTagAndValue readTags(final byte[] binaryRep, final int offset, - final int length, final ValidationStringency validationStringency) { + public static SAMBinaryTagAndValue readTags( + final byte[] binaryRep, + final int offset, + final int length, + final ValidationStringency validationStringency) { final ByteBuffer byteBuffer = ByteBuffer.wrap(binaryRep, offset, length); byteBuffer.order(ByteOrder.LITTLE_ENDIAN); @@ -279,11 +287,12 @@ public static SAMBinaryTagAndValue readTags(final byte[] binaryRep, final int of final byte tagType = byteBuffer.get(); final SAMBinaryTagAndValue tmp; if (tagType != 'B') { - tmp = new SAMBinaryTagAndValue(tag, readSingleValue(tagType, byteBuffer, validationStringency)); + tmp = new SAMBinaryTagAndValue(tag, readSingleValue(tagType, byteBuffer, validationStringency), true); } else { final TagValueAndUnsignedArrayFlag valueAndFlag = readArray(byteBuffer, validationStringency); - if (valueAndFlag.isUnsignedArray) tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, valueAndFlag.value); - else tmp = new SAMBinaryTagAndValue(tag, valueAndFlag.value); + if (valueAndFlag.isUnsignedArray) + tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, valueAndFlag.value, true); + else tmp = new SAMBinaryTagAndValue(tag, valueAndFlag.value, true); } // If samjdk wrote the BAM then the attributes will be in lowest->highest tag order, to inserting at the @@ -292,12 +301,10 @@ public static SAMBinaryTagAndValue readTags(final byte[] binaryRep, final int of if (head == null) { head = tmp; tail = tmp; - } - else if (tmp.tag > tail.tag) { - tail.insert(tmp); + } else if (tmp.tag > tail.tag) { + tail.next = tmp; tail = tmp; - } - else { + } else { head = head.insert(tmp); } } @@ -311,57 +318,58 @@ else if (tmp.tag > tail.tag) { * @param byteBuffer Little-ending byte buffer to read value from. * @return Value in in-memory Object form. */ - private static Object readSingleValue(final byte tagType, final ByteBuffer byteBuffer, - final ValidationStringency validationStringency) { + private static Object readSingleValue( + final byte tagType, final ByteBuffer byteBuffer, final ValidationStringency validationStringency) { switch (tagType) { case 'Z': return readNullTerminatedString(byteBuffer); case 'A': - return (char)byteBuffer.get(); + return (char) byteBuffer.get(); case 'I': final long val = byteBuffer.getInt() & 0xffffffffL; - if ( val <= Integer.MAX_VALUE ) { - return (int)val; + if (val <= Integer.MAX_VALUE) { + return (int) val; } // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer, // return it directly as a long - if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) { - SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE, - "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency); + if (!SAMUtils.isValidUnsignedIntegerAttribute(val)) { + SAMUtils.processValidationError( + new SAMValidationError( + SAMValidationError.Type.TAG_VALUE_TOO_LARGE, + "Unsigned integer is out of range for a 32-bit unsigned value: " + val, + null), + validationStringency); } return val; case 'i': return byteBuffer.getInt(); case 's': - return (int)byteBuffer.getShort(); + return (int) byteBuffer.getShort(); case 'S': // Convert to unsigned short stored in an int return byteBuffer.getShort() & 0xffff; case 'c': - return (int)byteBuffer.get(); + return (int) byteBuffer.get(); case 'C': // Convert to unsigned byte stored in an int - return (int)byteBuffer.get() & 0xff; + return (int) byteBuffer.get() & 0xff; case 'f': return byteBuffer.getFloat(); case 'H': final String hexRep = readNullTerminatedString(byteBuffer); return StringUtil.hexStringToBytes(hexRep); default: - throw new SAMFormatException("Unrecognized tag type: " + (char)tagType); + throw new SAMFormatException("Unrecognized tag type: " + (char) tagType); } } - - - /** * Read value of specified type. * @param byteBuffer Little-ending byte buffer to read value from. * @return CVO containing the value in in-memory Object form, and a flag indicating whether it is unsigned or not. */ - private static TagValueAndUnsignedArrayFlag readArray(final ByteBuffer byteBuffer, - final ValidationStringency validationStringency) { + private static TagValueAndUnsignedArrayFlag readArray( + final ByteBuffer byteBuffer, final ValidationStringency validationStringency) { final byte arrayType = byteBuffer.get(); final boolean isUnsigned = Character.isUpperCase(arrayType); final int length = byteBuffer.getInt(); @@ -401,25 +409,24 @@ private static TagValueAndUnsignedArrayFlag readArray(final ByteBuffer byteBuffe } default: - throw new SAMFormatException("Unrecognized tag array type: " + (char)arrayType); + throw new SAMFormatException("Unrecognized tag array type: " + (char) arrayType); } return new TagValueAndUnsignedArrayFlag(value, isUnsigned); } private static String readNullTerminatedString(final ByteBuffer byteBuffer) { - // Count the number of bytes in the string - byteBuffer.mark(); - final int startPosition = byteBuffer.position(); - while (byteBuffer.get() != 0) {} - final int endPosition = byteBuffer.position(); - - // Don't count null terminator - final byte[] buf = new byte[endPosition - startPosition - 1]; - // Go back to the start of the string and read out the bytes - byteBuffer.reset(); - byteBuffer.get(buf); - // Skip over the null terminator - byteBuffer.get(); - return StringUtil.bytesToString(buf); + // Scan the backing array directly to avoid the double-pass of mark/reset/re-read + final byte[] array = byteBuffer.array(); + final int start = byteBuffer.arrayOffset() + byteBuffer.position(); + final int limit = byteBuffer.arrayOffset() + byteBuffer.limit(); + int end = start; + while (end < limit && array[end] != 0) { + end++; + } + if (end >= limit) { + throw new SAMFormatException("Null-terminated string tag value is not null terminated."); + } + byteBuffer.position(byteBuffer.position() + (end - start) + 1); // advance past null terminator + return StringUtil.bytesToString(array, start, end - start); } } diff --git a/src/main/java/htsjdk/samtools/BinningIndexBuilder.java b/src/main/java/htsjdk/samtools/BinningIndexBuilder.java index 3b329d2bc1..76046558fd 100644 --- a/src/main/java/htsjdk/samtools/BinningIndexBuilder.java +++ b/src/main/java/htsjdk/samtools/BinningIndexBuilder.java @@ -23,12 +23,9 @@ */ package htsjdk.samtools; -import htsjdk.samtools.util.BlockCompressedFilePointerUtil; +import static htsjdk.samtools.GenomicIndexUtil.MAX_BINS; import java.util.Arrays; -import java.util.List; - -import static htsjdk.samtools.GenomicIndexUtil.MAX_BINS; /** * Builder for a BinningIndexContent object. @@ -54,7 +51,8 @@ public class BinningIndexBuilder { * if false, leave uninitialized values as -1, which is required when merging index files * (see {@link BAMIndexMerger}) */ - public BinningIndexBuilder(final int referenceSequence, final int sequenceLength, final boolean fillInUninitializedValues) { + public BinningIndexBuilder( + final int referenceSequence, final int sequenceLength, final boolean fillInUninitializedValues) { this.referenceSequence = referenceSequence; this.fillInUninitializedValues = fillInUninitializedValues; // Initially set each window to -1 so we can distinguish between windows that have no overlapping @@ -84,8 +82,11 @@ public BinningIndexBuilder(final int referenceSequence) { */ public interface FeatureToBeIndexed { public int getStart(); + public int getEnd(); + public Integer getIndexingBin(); + public Chunk getChunk(); } @@ -96,7 +97,6 @@ public void processFeature(final FeatureToBeIndexed feature) { final Integer binNumber = feature.getIndexingBin(); final int binNum = binNumber == null ? computeIndexingBin(feature) : binNumber; - // is there a bin already represented for this index? if not, add one final Bin bin; if (bins[binNum] != null) { @@ -120,7 +120,7 @@ public void processFeature(final FeatureToBeIndexed feature) { int startWindow = LinearIndex.convertToLinearIndexOffset(feature.getStart()); // the 16k window final int endWindow; - if (featureEnd == GenomicIndexUtil.UNSET_GENOMIC_LOCATION) { // assume feature uses one position + if (featureEnd == GenomicIndexUtil.UNSET_GENOMIC_LOCATION) { // assume feature uses one position // Next line for C (samtools index) compatibility. Differs only when on a window boundary startWindow = LinearIndex.convertToLinearIndexOffset(feature.getStart() - 1); endWindow = startWindow; @@ -151,16 +151,16 @@ public void processFeature(final FeatureToBeIndexed feature) { */ public BinningIndexContent generateIndexContent() { - // process bins - if (binsSeen == 0) return null; // no bins for this reference + if (binsSeen == 0) return null; // no bins for this reference // process chunks // nothing needed // process linear index // linear index will only be as long as the largest index seen - final long[] newIndex = new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1); + final long[] newIndex = + new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1); // C (samtools index) also fills in intermediate 0's with values. This seems unnecessary, but safe long lastNonZeroOffset = 0; @@ -183,7 +183,7 @@ public BinningIndexContent generateIndexContent() { private int computeIndexingBin(final FeatureToBeIndexed feature) { // regionToBin has zero-based, half-open API - final int start = feature.getStart()-1; + final int start = feature.getStart() - 1; int end = feature.getEnd(); if (end <= 0) { // If feature end cannot be determined (e.g. because a read is not really aligned), diff --git a/src/main/java/htsjdk/samtools/BinningIndexContent.java b/src/main/java/htsjdk/samtools/BinningIndexContent.java index bc55ef693d..e2ad018566 100644 --- a/src/main/java/htsjdk/samtools/BinningIndexContent.java +++ b/src/main/java/htsjdk/samtools/BinningIndexContent.java @@ -51,7 +51,6 @@ public class BinningIndexContent { */ private final LinearIndex mLinearIndex; - /** * @param referenceSequence Content corresponds to this reference. * @param binList Array of bins represented by this content, possibly sparse @@ -110,7 +109,6 @@ public LinearIndex getLinearIndex() { return mLinearIndex; } - /** * * @param startPos 1-based, inclusive @@ -118,7 +116,7 @@ public LinearIndex getLinearIndex() { * @return List of Chunks overlapping the given region. May return null if there are none. */ public List getChunksOverlapping(final int startPos, final int endPos) { - final BitSet overlappingBins = GenomicIndexUtil.regionToBins(startPos,endPos); + final BitSet overlappingBins = GenomicIndexUtil.regionToBins(startPos, endPos); if (overlappingBins == null) return null; // System.out.println("# Sequence target TID: " + referenceIndex); @@ -147,7 +145,7 @@ public static class BinList implements Iterable { private final Bin[] mBinArray; public final int numberOfNonNullBins; - public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based + public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based /** * @param binArray a sparse array representation of the bins. The index into the array is the bin number. @@ -207,8 +205,7 @@ public boolean hasNext() { */ @Override public Bin next() { - if (!hasNext()) - throw new NoSuchElementException("This BinIterator is currently empty"); + if (!hasNext()) throw new NoSuchElementException("This BinIterator is currently empty"); final Bin result = getBin(nextBin); nextBin++; return result; diff --git a/src/main/java/htsjdk/samtools/BrowseableBAMIndex.java b/src/main/java/htsjdk/samtools/BrowseableBAMIndex.java index c6eca51e55..8ebe3d722e 100644 --- a/src/main/java/htsjdk/samtools/BrowseableBAMIndex.java +++ b/src/main/java/htsjdk/samtools/BrowseableBAMIndex.java @@ -21,7 +21,7 @@ public interface BrowseableBAMIndex extends BAMIndex { * @return the level associated with the given bin number. */ public int getLevelForBin(final Bin bin); - + /** * Gets the first locus that this bin can index into. * @param bin The bin to test. @@ -50,5 +50,5 @@ public interface BrowseableBAMIndex extends BAMIndex { * @param bin The bin over which to perform an overlapping query. * @return The file pointers */ - BAMFileSpan getSpanOverlapping(final Bin bin); + BAMFileSpan getSpanOverlapping(final Bin bin); } diff --git a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java index 38bdc35b62..496d697ca6 100755 --- a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java @@ -1,452 +1,453 @@ -/******************************************************************************* - * Copyright 2013 EMBL-EBI - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -/* - * The MIT License - * - * Copyright (c) 2014 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sub-license, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools; - -import htsjdk.samtools.cram.BAIEntry; -import htsjdk.samtools.cram.CRAIEntry; -import htsjdk.samtools.cram.CRAIIndex; -import htsjdk.samtools.cram.build.CramIO; -import htsjdk.samtools.cram.ref.ReferenceContext; -import htsjdk.samtools.cram.structure.*; -import htsjdk.samtools.seekablestream.SeekableStream; -import htsjdk.samtools.util.BlockCompressedFilePointerUtil; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.ProgressLogger; -import htsjdk.samtools.util.RuntimeIOException; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.*; - -/** - * Class for both constructing BAM index content and writing it out. - * - * There are two usage patterns: - * - * 1) Building a bam index (BAI) while building the CRAM file - * 2) Building a bam index (BAI) from an existing CRAI file - * - * 1) is driven by {@link CRAMContainerStreamWriter} and proceeds by calling {@link CRAMBAIIndexer#processContainer} - * after each {@link Container} is built, and {@link CRAMBAIIndexer#finish()} is called at the end. - * - * 2) is driven by {@link CRAIIndex#openCraiFileAsBaiStream(InputStream, SAMSequenceDictionary)} - * and proceeds by processing {@link CRAIEntry} elements obtained from - * {@link CRAMCRAIIndexer#readIndex(InputStream)}. {@link CRAMBAIIndexer#processBAIEntry(BAIEntry)} - * is called on each {@link CRAIEntry} and {@link CRAMBAIIndexer#finish()} is called at the end. - * - * NOTE: a third pattern of building a BAI from a CRAM file is also supported by this class, - * but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}. - */ -public class CRAMBAIIndexer implements CRAMIndexer { - - // The number of references (chromosomes) in the BAM file - private final int numReferences; - - // output written as binary, or (for debugging) as text - private final BAMIndexWriter outputWriter; - - private int currentReference = 0; - - // content is built up from the input bam file using this - private final CRAMBAIIndexBuilder indexBuilder; - private final CompressorCache compressorCache = new CompressorCache(); - - /** - * Create a CRAM indexer that writes BAI to a file. - * - * @param output binary BAM Index (.bai) file - * @param fileHeader header for the corresponding bam file - */ - private CRAMBAIIndexer(final File output, final SAMFileHeader fileHeader) { - if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - throw new SAMException("CRAM file must be coordinate-sorted for indexing."); - } - numReferences = fileHeader.getSequenceDictionary().size(); - indexBuilder = new CRAMBAIIndexBuilder(fileHeader); - outputWriter = new BinaryBAMIndexWriter(numReferences, output); - } - - /** - * Create a CRAM indexer that writes BAI to a stream. - * - * @param output Index will be written here. output will be closed when finish() method is called. - * @param fileHeader header for the corresponding bam file. - */ - public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { - if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - throw new SAMException("CRAM file mut be coordinate-sorted for indexing."); - } - numReferences = fileHeader.getSequenceDictionary().size(); - indexBuilder = new CRAMBAIIndexBuilder(fileHeader); - outputWriter = new BinaryBAMIndexWriter(numReferences, output); - } - - /** - * Index a container, any of mapped, unmapped and multiple references are allowed. - * The only requirement is sort order by coordinate. - * For multiref containers the method reads the container through unpacking all reads. - * This is slower than single reference but should be faster than normal reading. - * - * @param container container to be indexed - */ - @Override - public void processContainer(final Container container, final ValidationStringency validationStringency) { - container.getBAIEntries(compressorCache).forEach(b -> processBAIEntry(b)); - } - - public final void processBAIEntry(final BAIEntry baiEntry) { - - final ReferenceContext entryContext = baiEntry.getReferenceContext(); - if (entryContext.isMultiRef()) { - throw new SAMException("Expecting a single reference or unmapped slice."); - } - - if (entryContext.isMappedSingleRef()) { - final int reference = entryContext.getReferenceSequenceID(); - if (reference != currentReference) { - // process any completed references - advanceToReference(reference); - } - - // check that it advanced properly - if (reference != currentReference) { - throw new SAMException( - String.format("Unexpected reference %s when constructing index for reference %d for slice", - reference, - currentReference)); - } - } - - indexBuilder.recordBAIEntryIndexMetadata(baiEntry); - - if (entryContext.isMappedSingleRef()) { - indexBuilder.processBAIEntry(baiEntry); - } - } - - /** - * After all the slices have been processed, finish is called. - * Writes any final information and closes the output file. - */ - @Override - public void finish() { - // process any remaining references - advanceToReference(numReferences); - outputWriter.writeNoCoordinateRecordCount(indexBuilder.getNoCoordinateRecordCount()); - outputWriter.close(); - } - - /** - * write out any references between the currentReference and the nextReference - */ - private void advanceToReference(final int nextReference) { - while (currentReference < nextReference) { - final BAMIndexContent content = indexBuilder.processCurrentReference(); - outputWriter.writeReference(content); - currentReference++; - indexBuilder.startNewReference(); - } - } - - /** - * Generates a BAI index file from an input CRAM stream - * - * @param stream CRAM stream to index - * @param output File for output index file - * @param log optional {@link htsjdk.samtools.util.Log} to output progress - */ - public static void createIndex(final SeekableStream stream, - final File output, - final Log log, - final ValidationStringency validationStringency) { - - final CramHeader cramHeader = CramIO.readCramHeader(stream); - final SAMFileHeader samFileHeader = Container.readSAMFileHeaderContainer(cramHeader.getCRAMVersion(), stream, null); - if (samFileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { - throw new SAMException(String.format( - "Input must be coordinate sorted (found %s) to create an index.", - samFileHeader.getSortOrder())); - } - final CRAMBAIIndexer indexer = new CRAMBAIIndexer(output, samFileHeader); - - Container container = null; - final ProgressLogger progressLogger = new ProgressLogger(log, 1, "indexed", "slices"); - do { - try { - container = new Container(cramHeader.getCRAMVersion(), stream, stream.position()); - } catch (final IOException e) { - throw new RuntimeIOException("error getting stream position", e); - } - if (container == null || container.isEOF()) { - break; - } - - indexer.processContainer(container, validationStringency); - - if (null != log) { - String sequenceName; - final AlignmentContext alignmentContext = container.getAlignmentContext(); - final ReferenceContext containerReferenceContext = alignmentContext.getReferenceContext(); - switch (containerReferenceContext.getType()) { - case UNMAPPED_UNPLACED_TYPE: - sequenceName = "?"; - break; - case MULTIPLE_REFERENCE_TYPE: - sequenceName = "???"; - break; - default: - sequenceName = samFileHeader.getSequence( - containerReferenceContext.getReferenceSequenceID()).getSequenceName(); - break; - } - progressLogger.record(sequenceName, alignmentContext.getAlignmentStart()); - } - - } while (!container.isEOF()); - - indexer.finish(); - } - - /** - * Class for constructing BAM index files. - * One instance is used to construct an entire index. - * processAlignment is called for each alignment until a new reference is encountered, then - * processReference is called when all records for the reference have been processed. - */ - private class CRAMBAIIndexBuilder { - - private final SAMFileHeader bamHeader; - - // the bins for the current reference - private Bin[] bins; // made only as big as needed for each reference - private int binsSeen = 0; - - // linear index for the current reference - private final long[] index = new long[LinearIndex.MAX_LINEAR_INDEX_SIZE]; - private int largestIndexSeen = -1; - - // information in meta data - private final BAMIndexMetaData indexStats = new BAMIndexMetaData(); - - /** - * @param header SAMFileHeader used for reference name (in index stats) and for max bin number - */ - private CRAMBAIIndexBuilder(final SAMFileHeader header) { - this.bamHeader = header; - } - - private SAMFileHeader getBamHeader() { - return bamHeader; - } - - private void recordBAIEntryIndexMetadata(final BAIEntry baiEntry) { - indexStats.recordMetaData(baiEntry); - } - - private int computeIndexingBin(final BAIEntry baiEntry) { - // regionToBin has zero-based, half-open API - //final AlignmentContext sliceAlignmentContext = baiEntry.getAlignmentContext(); - final int alignmentStart = baiEntry.getAlignmentStart() - 1; - int alignmentEnd = baiEntry.getAlignmentStart() + baiEntry.getAlignmentSpan() - 1; - if (alignmentEnd <= alignmentStart) { - // If alignment end cannot be determined (e.g. because this read is not really aligned), - // then treat this as a one base alignment for indexing purposes. - alignmentEnd = alignmentStart + 1; - } - return GenomicIndexUtil.regionToBin(alignmentStart, alignmentEnd); - } - - /** - * Record any index information for a given CRAM slice - * - * Reads these Slice fields: - * sequenceId, alignmentStart, alignmentSpan, containerByteOffset, index - * - //* @param slice CRAM slice, single ref only. - */ - private void processBAIEntry(final BAIEntry baiEntry) { - final ReferenceContext sliceContext = baiEntry.getReferenceContext(); - if (! sliceContext.isMappedSingleRef()) { - return; // do nothing for records without coordinates, but count them - } - - // various checks - final int reference = sliceContext.getReferenceSequenceID(); - if (reference != currentReference) { - throw new SAMException( - String.format("Unexpected reference %s when constructing index for reference %d for slice", - reference, - currentReference)); - } - - // process bins - - final int binNum = computeIndexingBin(baiEntry); - - // has the bins array been allocated? If not, do so - if (bins == null) { - final SAMSequenceRecord seq = bamHeader.getSequence(reference); - if (seq == null) { - bins = new Bin[GenomicIndexUtil.MAX_BINS + 1]; - } else { - bins = new Bin[AbstractBAMFileIndex.getMaxBinNumberForSequenceLength(seq.getSequenceLength()) + 1]; - } - } - - // is there a bin already represented for this index? if not, add one - final Bin bin; - if (bins[binNum] != null) { - bin = bins[binNum]; - } else { - bin = new Bin(reference, binNum); - bins[binNum] = bin; - binsSeen++; - } - - // process chunks - - final long chunkStart = (baiEntry.getContainerStartByteOffset() << 16) | baiEntry.getLandmarkIndex(); - final long chunkEnd = ((baiEntry.getContainerStartByteOffset() << 16) | baiEntry.getLandmarkIndex()) + 1; - - final Chunk newChunk = new Chunk(chunkStart, chunkEnd); - - final List oldChunks = bin.getChunkList(); - if (!bin.containsChunks()) { - bin.addInitialChunk(newChunk); - - } else { - final Chunk lastChunk = bin.getLastChunk(); - - // Coalesce chunks that are in the same or adjacent file blocks. - // Similar to AbstractBAMFileIndex.optimizeChunkList, - // but no need to copy the list, no minimumOffset, and maintain bin.lastChunk - if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks(lastChunk.getChunkEnd(), chunkStart)) { - lastChunk.setChunkEnd(chunkEnd); // coalesced - } else { - oldChunks.add(newChunk); - bin.setLastChunk(newChunk); - } - } - - // process linear index - - // the smallest file offset that appears in the 16k window for this bin - final int alignmentStart = baiEntry.getAlignmentStart(); - final int alignmentEnd = baiEntry.getAlignmentStart() + baiEntry.getAlignmentSpan(); - int startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart); // the 16k window - final int endWindow; - - if (alignmentEnd == SAMRecord.NO_ALIGNMENT_START) { // assume alignment uses one position - // Next line for C (samtools index) compatibility. Differs only when on a window boundary - startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart - 1); - endWindow = startWindow; - } else { - endWindow = LinearIndex.convertToLinearIndexOffset(alignmentEnd); - } - - if (endWindow > largestIndexSeen) { - largestIndexSeen = endWindow; - } - - // set linear index at every 16K window that this alignment overlaps - for (int win = startWindow; win <= endWindow; win++) { - if (index[win] == 0 || chunkStart < index[win]) { - index[win] = chunkStart; - } - } - } - - /** - * Creates the BAMIndexContent for this reference. - * Requires all alignments of the reference have already been processed. - */ - private BAMIndexContent processCurrentReference() { - - // process bins - if (binsSeen == 0) { - return null; // no bins for this reference - } - - // process chunks - // nothing needed - - // process linear index - // linear index will only be as long as the largest index seen - final long[] newIndex = new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1); - - // C (samtools index) also fills in intermediate 0's with values. This seems unnecessary, but safe - long lastNonZeroOffset = 0; - for (int i = 0; i <= largestIndexSeen; i++) { - if (index[i] == 0) { - index[i] = lastNonZeroOffset; // not necessary, but C (samtools index) does this - // note, if you remove the above line BAMIndexWriterTest.compareTextual and compareBinary will have to change - } else { - lastNonZeroOffset = index[i]; - } - newIndex[i] = index[i]; - } - - final LinearIndex linearIndex = new LinearIndex(currentReference, 0, newIndex); - - return new BAMIndexContent(currentReference, bins, binsSeen, indexStats, linearIndex); - } - - /** - * @return the count of records with no coordinate positions - */ - private long getNoCoordinateRecordCount() { - return indexStats.getNoCoordinateRecordCount(); - } - - /** - * reinitialize all data structures when the reference changes - */ - private void startNewReference() { - bins = null; - if (binsSeen > 0) { - Arrays.fill(index, 0); - } - binsSeen = 0; - largestIndexSeen = -1; - indexStats.newReference(); - } - } -} +/******************************************************************************* + * Copyright 2013 EMBL-EBI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ******************************************************************************/ +/* + * The MIT License + * + * Copyright (c) 2014 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sub-license, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools; + +import htsjdk.samtools.cram.BAIEntry; +import htsjdk.samtools.cram.CRAIEntry; +import htsjdk.samtools.cram.CRAIIndex; +import htsjdk.samtools.cram.build.CramIO; +import htsjdk.samtools.cram.ref.ReferenceContext; +import htsjdk.samtools.cram.structure.*; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.util.BlockCompressedFilePointerUtil; +import htsjdk.samtools.util.Log; +import htsjdk.samtools.util.ProgressLogger; +import htsjdk.samtools.util.RuntimeIOException; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.*; + +/** + * Class for both constructing BAM index content and writing it out. + * + * There are two usage patterns: + * + * 1) Building a bam index (BAI) while building the CRAM file + * 2) Building a bam index (BAI) from an existing CRAI file + * + * 1) is driven by {@link CRAMContainerStreamWriter} and proceeds by calling {@link CRAMBAIIndexer#processContainer} + * after each {@link Container} is built, and {@link CRAMBAIIndexer#finish()} is called at the end. + * + * 2) is driven by {@link CRAIIndex#openCraiFileAsBaiStream(InputStream, SAMSequenceDictionary)} + * and proceeds by processing {@link CRAIEntry} elements obtained from + * {@link CRAMCRAIIndexer#readIndex(InputStream)}. {@link CRAMBAIIndexer#processBAIEntry(BAIEntry)} + * is called on each {@link CRAIEntry} and {@link CRAMBAIIndexer#finish()} is called at the end. + * + * NOTE: a third pattern of building a BAI from a CRAM file is also supported by this class, + * but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}. + */ +public class CRAMBAIIndexer implements CRAMIndexer { + + // The number of references (chromosomes) in the BAM file + private final int numReferences; + + // output written as binary, or (for debugging) as text + private final BAMIndexWriter outputWriter; + + private int currentReference = 0; + + // content is built up from the input bam file using this + private final CRAMBAIIndexBuilder indexBuilder; + private final CompressorCache compressorCache = new CompressorCache(); + + /** + * Create a CRAM indexer that writes BAI to a file. + * + * @param output binary BAM Index (.bai) file + * @param fileHeader header for the corresponding bam file + */ + private CRAMBAIIndexer(final File output, final SAMFileHeader fileHeader) { + if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { + throw new SAMException("CRAM file must be coordinate-sorted for indexing."); + } + numReferences = fileHeader.getSequenceDictionary().size(); + indexBuilder = new CRAMBAIIndexBuilder(fileHeader); + outputWriter = new BinaryBAMIndexWriter(numReferences, output); + } + + /** + * Create a CRAM indexer that writes BAI to a stream. + * + * @param output Index will be written here. output will be closed when finish() method is called. + * @param fileHeader header for the corresponding bam file. + */ + public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) { + if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { + throw new SAMException("CRAM file mut be coordinate-sorted for indexing."); + } + numReferences = fileHeader.getSequenceDictionary().size(); + indexBuilder = new CRAMBAIIndexBuilder(fileHeader); + outputWriter = new BinaryBAMIndexWriter(numReferences, output); + } + + /** + * Index a container, any of mapped, unmapped and multiple references are allowed. + * The only requirement is sort order by coordinate. + * For multiref containers the method reads the container through unpacking all reads. + * This is slower than single reference but should be faster than normal reading. + * + * @param container container to be indexed + */ + @Override + public void processContainer(final Container container, final ValidationStringency validationStringency) { + container.getBAIEntries(compressorCache).forEach(b -> processBAIEntry(b)); + } + + public final void processBAIEntry(final BAIEntry baiEntry) { + + final ReferenceContext entryContext = baiEntry.getReferenceContext(); + if (entryContext.isMultiRef()) { + throw new SAMException("Expecting a single reference or unmapped slice."); + } + + if (entryContext.isMappedSingleRef()) { + final int reference = entryContext.getReferenceSequenceID(); + if (reference != currentReference) { + // process any completed references + advanceToReference(reference); + } + + // check that it advanced properly + if (reference != currentReference) { + throw new SAMException(String.format( + "Unexpected reference %s when constructing index for reference %d for slice", + reference, currentReference)); + } + } + + indexBuilder.recordBAIEntryIndexMetadata(baiEntry); + + if (entryContext.isMappedSingleRef()) { + indexBuilder.processBAIEntry(baiEntry); + } + } + + /** + * After all the slices have been processed, finish is called. + * Writes any final information and closes the output file. + */ + @Override + public void finish() { + // process any remaining references + advanceToReference(numReferences); + outputWriter.writeNoCoordinateRecordCount(indexBuilder.getNoCoordinateRecordCount()); + outputWriter.close(); + } + + /** + * write out any references between the currentReference and the nextReference + */ + private void advanceToReference(final int nextReference) { + while (currentReference < nextReference) { + final BAMIndexContent content = indexBuilder.processCurrentReference(); + outputWriter.writeReference(content); + currentReference++; + indexBuilder.startNewReference(); + } + } + + /** + * Generates a BAI index file from an input CRAM stream + * + * @param stream CRAM stream to index + * @param output File for output index file + * @param log optional {@link htsjdk.samtools.util.Log} to output progress + */ + public static void createIndex( + final SeekableStream stream, + final File output, + final Log log, + final ValidationStringency validationStringency) { + + final CramHeader cramHeader = CramIO.readCramHeader(stream); + final SAMFileHeader samFileHeader = + Container.readSAMFileHeaderContainer(cramHeader.getCRAMVersion(), stream, null); + if (samFileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { + throw new SAMException(String.format( + "Input must be coordinate sorted (found %s) to create an index.", samFileHeader.getSortOrder())); + } + final CRAMBAIIndexer indexer = new CRAMBAIIndexer(output, samFileHeader); + + Container container = null; + final ProgressLogger progressLogger = new ProgressLogger(log, 1, "indexed", "slices"); + do { + try { + container = new Container(cramHeader.getCRAMVersion(), stream, stream.position()); + } catch (final IOException e) { + throw new RuntimeIOException("error getting stream position", e); + } + if (container == null || container.isEOF()) { + break; + } + + indexer.processContainer(container, validationStringency); + + if (null != log) { + String sequenceName; + final AlignmentContext alignmentContext = container.getAlignmentContext(); + final ReferenceContext containerReferenceContext = alignmentContext.getReferenceContext(); + switch (containerReferenceContext.getType()) { + case UNMAPPED_UNPLACED_TYPE: + sequenceName = "?"; + break; + case MULTIPLE_REFERENCE_TYPE: + sequenceName = "???"; + break; + default: + sequenceName = samFileHeader + .getSequence(containerReferenceContext.getReferenceSequenceID()) + .getSequenceName(); + break; + } + progressLogger.record(sequenceName, alignmentContext.getAlignmentStart()); + } + + } while (!container.isEOF()); + + indexer.finish(); + } + + /** + * Class for constructing BAM index files. + * One instance is used to construct an entire index. + * processAlignment is called for each alignment until a new reference is encountered, then + * processReference is called when all records for the reference have been processed. + */ + private class CRAMBAIIndexBuilder { + + private final SAMFileHeader bamHeader; + + // the bins for the current reference + private Bin[] bins; // made only as big as needed for each reference + private int binsSeen = 0; + + // linear index for the current reference + private final long[] index = new long[LinearIndex.MAX_LINEAR_INDEX_SIZE]; + private int largestIndexSeen = -1; + + // information in meta data + private final BAMIndexMetaData indexStats = new BAMIndexMetaData(); + + /** + * @param header SAMFileHeader used for reference name (in index stats) and for max bin number + */ + private CRAMBAIIndexBuilder(final SAMFileHeader header) { + this.bamHeader = header; + } + + private SAMFileHeader getBamHeader() { + return bamHeader; + } + + private void recordBAIEntryIndexMetadata(final BAIEntry baiEntry) { + indexStats.recordMetaData(baiEntry); + } + + private int computeIndexingBin(final BAIEntry baiEntry) { + // regionToBin has zero-based, half-open API + // final AlignmentContext sliceAlignmentContext = baiEntry.getAlignmentContext(); + final int alignmentStart = baiEntry.getAlignmentStart() - 1; + int alignmentEnd = baiEntry.getAlignmentStart() + baiEntry.getAlignmentSpan() - 1; + if (alignmentEnd <= alignmentStart) { + // If alignment end cannot be determined (e.g. because this read is not really aligned), + // then treat this as a one base alignment for indexing purposes. + alignmentEnd = alignmentStart + 1; + } + return GenomicIndexUtil.regionToBin(alignmentStart, alignmentEnd); + } + + /** + * Record any index information for a given CRAM slice + * + * Reads these Slice fields: + * sequenceId, alignmentStart, alignmentSpan, containerByteOffset, index + * + * //* @param slice CRAM slice, single ref only. + */ + private void processBAIEntry(final BAIEntry baiEntry) { + final ReferenceContext sliceContext = baiEntry.getReferenceContext(); + if (!sliceContext.isMappedSingleRef()) { + return; // do nothing for records without coordinates, but count them + } + + // various checks + final int reference = sliceContext.getReferenceSequenceID(); + if (reference != currentReference) { + throw new SAMException(String.format( + "Unexpected reference %s when constructing index for reference %d for slice", + reference, currentReference)); + } + + // process bins + + final int binNum = computeIndexingBin(baiEntry); + + // has the bins array been allocated? If not, do so + if (bins == null) { + final SAMSequenceRecord seq = bamHeader.getSequence(reference); + if (seq == null) { + bins = new Bin[GenomicIndexUtil.MAX_BINS + 1]; + } else { + bins = new Bin[AbstractBAMFileIndex.getMaxBinNumberForSequenceLength(seq.getSequenceLength()) + 1]; + } + } + + // is there a bin already represented for this index? if not, add one + final Bin bin; + if (bins[binNum] != null) { + bin = bins[binNum]; + } else { + bin = new Bin(reference, binNum); + bins[binNum] = bin; + binsSeen++; + } + + // process chunks + + final long chunkStart = (baiEntry.getContainerStartByteOffset() << 16) | baiEntry.getLandmarkIndex(); + final long chunkEnd = ((baiEntry.getContainerStartByteOffset() << 16) | baiEntry.getLandmarkIndex()) + 1; + + final Chunk newChunk = new Chunk(chunkStart, chunkEnd); + + final List oldChunks = bin.getChunkList(); + if (!bin.containsChunks()) { + bin.addInitialChunk(newChunk); + + } else { + final Chunk lastChunk = bin.getLastChunk(); + + // Coalesce chunks that are in the same or adjacent file blocks. + // Similar to AbstractBAMFileIndex.optimizeChunkList, + // but no need to copy the list, no minimumOffset, and maintain bin.lastChunk + if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks(lastChunk.getChunkEnd(), chunkStart)) { + lastChunk.setChunkEnd(chunkEnd); // coalesced + } else { + oldChunks.add(newChunk); + bin.setLastChunk(newChunk); + } + } + + // process linear index + + // the smallest file offset that appears in the 16k window for this bin + final int alignmentStart = baiEntry.getAlignmentStart(); + final int alignmentEnd = baiEntry.getAlignmentStart() + baiEntry.getAlignmentSpan(); + int startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart); // the 16k window + final int endWindow; + + if (alignmentEnd == SAMRecord.NO_ALIGNMENT_START) { // assume alignment uses one position + // Next line for C (samtools index) compatibility. Differs only when on a window boundary + startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart - 1); + endWindow = startWindow; + } else { + endWindow = LinearIndex.convertToLinearIndexOffset(alignmentEnd); + } + + if (endWindow > largestIndexSeen) { + largestIndexSeen = endWindow; + } + + // set linear index at every 16K window that this alignment overlaps + for (int win = startWindow; win <= endWindow; win++) { + if (index[win] == 0 || chunkStart < index[win]) { + index[win] = chunkStart; + } + } + } + + /** + * Creates the BAMIndexContent for this reference. + * Requires all alignments of the reference have already been processed. + */ + private BAMIndexContent processCurrentReference() { + + // process bins + if (binsSeen == 0) { + return null; // no bins for this reference + } + + // process chunks + // nothing needed + + // process linear index + // linear index will only be as long as the largest index seen + final long[] newIndex = + new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1); + + // C (samtools index) also fills in intermediate 0's with values. This seems unnecessary, but safe + long lastNonZeroOffset = 0; + for (int i = 0; i <= largestIndexSeen; i++) { + if (index[i] == 0) { + index[i] = lastNonZeroOffset; // not necessary, but C (samtools index) does this + // note, if you remove the above line BAMIndexWriterTest.compareTextual and compareBinary will have + // to change + } else { + lastNonZeroOffset = index[i]; + } + newIndex[i] = index[i]; + } + + final LinearIndex linearIndex = new LinearIndex(currentReference, 0, newIndex); + + return new BAMIndexContent(currentReference, bins, binsSeen, indexStats, linearIndex); + } + + /** + * @return the count of records with no coordinate positions + */ + private long getNoCoordinateRecordCount() { + return indexStats.getNoCoordinateRecordCount(); + } + + /** + * reinitialize all data structures when the reference changes + */ + private void startNewReference() { + bins = null; + if (binsSeen > 0) { + Arrays.fill(index, 0); + } + binsSeen = 0; + largestIndexSeen = -1; + indexStats.newReference(); + } + } +} diff --git a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java index aaff4847fe..4b03425ee6 100644 --- a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java @@ -7,11 +7,10 @@ import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.BufferedOutputStream; +import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.io.IOException; import java.util.Collection; import java.util.Scanner; import java.util.zip.GZIPInputStream; @@ -28,8 +27,8 @@ */ public class CRAMCRAIIndexer implements CRAMIndexer { - final private CRAIIndex craiIndex = new CRAIIndex(); - final private GZIPOutputStream os; + private final CRAIIndex craiIndex = new CRAIIndex(); + private final GZIPOutputStream os; /** * Create a CRAMCRAIIndexer that writes to the given output stream. @@ -43,8 +42,7 @@ public CRAMCRAIIndexer(final OutputStream os, final SAMFileHeader samHeader) { } try { this.os = new GZIPOutputStream(new BufferedOutputStream(os)); - } - catch (IOException e) { + } catch (IOException e) { throw new RuntimeIOException("Error opening CRAI index output stream"); } } @@ -57,9 +55,7 @@ public CRAMCRAIIndexer(final OutputStream os, final SAMFileHeader samHeader) { * @param samHeader SAMFileHeader - user to verify sort order * @param entries the CRAI entries to index */ - public CRAMCRAIIndexer(final OutputStream os, - final SAMFileHeader samHeader, - final Collection entries) { + public CRAMCRAIIndexer(final OutputStream os, final SAMFileHeader samHeader, final Collection entries) { this(os, samHeader); craiIndex.addEntries(entries); } @@ -73,9 +69,7 @@ public void processContainer(final Container container) { } @Override - public void processContainer( - final Container container, - final ValidationStringency validationStringency) { + public void processContainer(final Container container, final ValidationStringency validationStringency) { processContainer(container); } @@ -88,8 +82,7 @@ public void finish() { craiIndex.writeIndex(os); os.flush(); os.close(); - } - catch (IOException e) { + } catch (IOException e) { throw new RuntimeIOException("Error writing CRAI index to output stream"); } } @@ -102,11 +95,11 @@ public void finish() { */ public static void writeIndex(final SeekableStream cramStream, OutputStream craiStream) { final CramHeader cramHeader = CramIO.readCramHeader(cramStream); - final SAMFileHeader samFileHeader = Container.readSAMFileHeaderContainer(cramHeader.getCRAMVersion(), cramStream, null); + final SAMFileHeader samFileHeader = + Container.readSAMFileHeaderContainer(cramHeader.getCRAMVersion(), cramStream, null); if (samFileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new SAMException(String.format( - "Input must be coordinate sorted (found %s) to create an index.", - samFileHeader.getSortOrder())); + "Input must be coordinate sorted (found %s) to create an index.", samFileHeader.getSortOrder())); } final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, samFileHeader); final CRAMVersion cramVersion = cramHeader.getCRAMVersion(); @@ -141,11 +134,9 @@ public static CRAIIndex readIndex(final InputStream is) { final String line = scanner.nextLine(); craiIndex.addEntry(new CRAIEntry(line)); } - } - catch (IOException e) { + } catch (IOException e) { throw new RuntimeIOException("Error reading CRAI index from output stream"); - } - finally { + } finally { if (null != scanner) { scanner.close(); } @@ -153,5 +144,4 @@ public static CRAIIndex readIndex(final InputStream is) { return craiIndex; } - -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java index d8c1dd9568..85db82e02a 100644 --- a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java +++ b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java @@ -2,11 +2,10 @@ import htsjdk.samtools.cram.build.ContainerFactory; import htsjdk.samtools.cram.build.CramIO; -import htsjdk.samtools.cram.common.CramVersions; +import htsjdk.samtools.cram.common.CRAMVersion; import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; import java.io.OutputStream; @@ -19,6 +18,7 @@ public class CRAMContainerStreamWriter { private final SAMFileHeader samFileHeader; private final ContainerFactory containerFactory; private final CRAMIndexer cramIndexer; + private final CRAMVersion cramVersion; private long streamOffset = 0; @@ -38,13 +38,14 @@ public CRAMContainerStreamWriter( final CRAMReferenceSource source, final SAMFileHeader samFileHeader, final String outputIdentifier) { - this(recordOutputStream, + this( + recordOutputStream, source, samFileHeader, outputIdentifier, - indexOutputStream == null ? - null : - new CRAMBAIIndexer(indexOutputStream, samFileHeader)); // default to BAI index + indexOutputStream == null + ? null + : new CRAMBAIIndexer(indexOutputStream, samFileHeader)); // default to BAI index } /** @@ -70,7 +71,7 @@ public CRAMContainerStreamWriter( * Create a CRAMContainerStreamWriter for writing SAM records into a series of CRAM * containers on output stream, with an optional index. * - * @param encodingStrategy encoding strategy values + * @param encodingStrategy encoding strategy values (includes CRAM version) * @param referenceSource reference cramReferenceSource * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg. * @param outputStream where to write the CRAM stream. @@ -88,6 +89,7 @@ public CRAMContainerStreamWriter( this.outputStream = outputStream; this.cramIndexer = indexer; this.outputStreamIdentifier = outputIdentifier; + this.cramVersion = encodingStrategy.getCramVersion(); this.containerFactory = new ContainerFactory(samFileHeader, encodingStrategy, referenceSource); } @@ -103,13 +105,14 @@ public void writeAlignment(final SAMRecord alignment) { } /** - * Write a CRAM file header and the previously provided SAM header to the stream. + * Write a CRAM file header and the provided SAM header to the stream. + * Retained for backward compatibility with external projects (disq, GATK). */ - // TODO: retained for backward compatibility for disq in order to run GATK tests (remove before merging this branch) public void writeHeader(final SAMFileHeader requestedSAMFileHeader) { - final CramHeader cramHeader = new CramHeader(CramVersions.DEFAULT_CRAM_VERSION, outputStreamIdentifier); + final CramHeader cramHeader = new CramHeader(cramVersion, outputStreamIdentifier); streamOffset = CramIO.writeCramHeader(cramHeader, outputStream); - streamOffset += Container.writeSAMFileHeaderContainer(cramHeader.getCRAMVersion(), requestedSAMFileHeader, outputStream); + streamOffset += Container.writeSAMFileHeaderContainer( + cramHeader.getCRAMVersion(), requestedSAMFileHeader, outputStream); } /** @@ -131,7 +134,7 @@ public void finish(final boolean writeEOFContainer) { writeContainer(container); } if (writeEOFContainer) { - CramIO.writeCramEOF(CramVersions.DEFAULT_CRAM_VERSION, outputStream); + CramIO.writeCramEOF(cramVersion, outputStream); } outputStream.flush(); if (cramIndexer != null) { @@ -144,12 +147,11 @@ public void finish(final boolean writeEOFContainer) { } protected void writeContainer(final Container container) { - streamOffset += container.write(CramVersions.DEFAULT_CRAM_VERSION, outputStream); + streamOffset += container.write(cramVersion, outputStream); if (cramIndexer != null) { // using silent validation here because the reads have been through validation already or // they have been generated somehow through the htsjdk - cramIndexer.processContainer(container, ValidationStringency.SILENT); + cramIndexer.processContainer(container, ValidationStringency.SILENT); } } - } diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java index 43e66711ff..730aa3f504 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileReader.java +++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java @@ -23,7 +23,6 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.*; import htsjdk.utils.ValidationUtils; - import java.io.*; import java.util.ArrayList; import java.util.Arrays; @@ -48,7 +47,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa private ValidationStringency validationStringency; - private final static Log log = Log.getInstance(CRAMFileReader.class); + private static final Log log = Log.getInstance(CRAMFileReader.class); /** * Create a CRAMFileReader from either a file or input stream using the reference source returned by @@ -76,11 +75,10 @@ public CRAMFileReader(final File cramFile, final InputStream inputStream) { * @throws IllegalArgumentException if the {@code cramFile} and the {@code inputStream} are both null * or if the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final File cramFile, - final InputStream inputStream, - final CRAMReferenceSource referenceSource) { - ValidationUtils.validateArg(cramFile != null || inputStream != null, - "Either file or input stream is required."); + public CRAMFileReader( + final File cramFile, final InputStream inputStream, final CRAMReferenceSource referenceSource) { + ValidationUtils.validateArg( + cramFile != null || inputStream != null, "Either file or input stream is required."); this.cramFile = cramFile; this.inputStream = new BufferedInputStream(inputStream); @@ -101,10 +99,8 @@ public CRAMFileReader(final File cramFile, * reference sequences. May not be null. * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final File cramFile, - final File indexFile, - final CRAMReferenceSource referenceSource) { - ValidationUtils.nonNull(cramFile,"File is required."); + public CRAMFileReader(final File cramFile, final File indexFile, final CRAMReferenceSource referenceSource) { + ValidationUtils.nonNull(cramFile, "File is required."); this.cramFile = cramFile; mIndexFile = findIndexForFile(indexFile, cramFile); @@ -122,7 +118,7 @@ public CRAMFileReader(final File cramFile, * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null */ public CRAMFileReader(final File cramFile, final CRAMReferenceSource referenceSource) { - ValidationUtils.nonNull(cramFile,"File is required."); + ValidationUtils.nonNull(cramFile, "File is required."); this.cramFile = cramFile; this.referenceSource = referenceSource; @@ -143,10 +139,12 @@ public CRAMFileReader(final File cramFile, final CRAMReferenceSource referenceSo * * @throws IllegalArgumentException if the {@code inputStream} or the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final InputStream inputStream, - final SeekableStream indexInputStream, - final CRAMReferenceSource referenceSource, - final ValidationStringency validationStringency) throws IOException { + public CRAMFileReader( + final InputStream inputStream, + final SeekableStream indexInputStream, + final CRAMReferenceSource referenceSource, + final ValidationStringency validationStringency) + throws IOException { ValidationUtils.nonNull(inputStream, "Input stream can not be null for CRAM reader"); this.referenceSource = referenceSource; initWithStreams(inputStream, indexInputStream, validationStringency); @@ -164,11 +162,17 @@ public CRAMFileReader(final InputStream inputStream, * * @throws IllegalArgumentException if the {@code inputStream} or the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final InputStream stream, - final File indexFile, - final CRAMReferenceSource referenceSource, - final ValidationStringency validationStringency) throws IOException { - this(stream, indexFile == null ? null : new SeekableFileStream(indexFile), referenceSource, validationStringency); + public CRAMFileReader( + final InputStream stream, + final File indexFile, + final CRAMReferenceSource referenceSource, + final ValidationStringency validationStringency) + throws IOException { + this( + stream, + indexFile == null ? null : new SeekableFileStream(indexFile), + referenceSource, + validationStringency); } /** @@ -183,8 +187,12 @@ public CRAMFileReader(final InputStream stream, * * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null */ - public CRAMFileReader(final File cramFile, final File indexFile, final CRAMReferenceSource referenceSource, - final ValidationStringency validationStringency) throws IOException { + public CRAMFileReader( + final File cramFile, + final File indexFile, + final CRAMReferenceSource referenceSource, + final ValidationStringency validationStringency) + throws IOException { ValidationUtils.nonNull(cramFile, "Input file can not be null for CRAM reader"); this.cramFile = cramFile; @@ -194,17 +202,21 @@ public CRAMFileReader(final File cramFile, final File indexFile, final CRAMRefer initWithStreams(new BufferedInputStream(new FileInputStream(cramFile)), indexStream, validationStringency); } - private void initWithStreams(final InputStream inputStream, final SeekableStream indexInputStream, - final ValidationStringency validationStringency) throws IOException { + private void initWithStreams( + final InputStream inputStream, + final SeekableStream indexInputStream, + final ValidationStringency validationStringency) + throws IOException { this.inputStream = inputStream; this.validationStringency = validationStringency; iterator = new CRAMIterator(inputStream, referenceSource, validationStringency); if (indexInputStream != null) { - SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary()); - if (null != baiStream) { - mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary()); - } - else { + SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull( + indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary()); + if (null != baiStream) { + mIndex = new CachingBAMFileIndex( + baiStream, iterator.getSAMFileHeader().getSequenceDictionary()); + } else { throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream"); } } @@ -213,8 +225,8 @@ private void initWithStreams(final InputStream inputStream, final SeekableStream private File findIndexForFile(File indexFile, final File cramFile) { indexFile = indexFile == null ? SamFiles.findIndex(cramFile) : indexFile; if (indexFile != null && indexFile.lastModified() < cramFile.lastModified()) { - log.warn("CRAM index file " + indexFile.getAbsolutePath() + - " is older than CRAM " + cramFile.getAbsolutePath()); + log.warn("CRAM index file " + indexFile.getAbsolutePath() + " is older than CRAM " + + cramFile.getAbsolutePath()); } return indexFile; } @@ -237,8 +249,7 @@ void enableCrcChecking(final boolean enabled) { } @Override - void setSAMRecordFactory(final SAMRecordFactory factory) { - } + void setSAMRecordFactory(final SAMRecordFactory factory) {} @Override public boolean hasIndex() { @@ -253,9 +264,9 @@ public BAMIndex getIndex() { if (mIndex == null) { final SAMSequenceDictionary dictionary = getFileHeader().getSequenceDictionary(); if (mIndexFile.getName().endsWith(FileExtensions.BAI_INDEX)) { - mIndex = mEnableIndexCaching ? - new CachingBAMFileIndex(mIndexFile, dictionary, mEnableIndexMemoryMapping) : - new DiskBasedBAMFileIndex(mIndexFile, dictionary, mEnableIndexMemoryMapping); + mIndex = mEnableIndexCaching + ? new CachingBAMFileIndex(mIndexFile, dictionary, mEnableIndexMemoryMapping) + : new DiskBasedBAMFileIndex(mIndexFile, dictionary, mEnableIndexMemoryMapping); return mIndex; } @@ -270,18 +281,22 @@ public BAMIndex getIndex() { throw new RuntimeException(e); } - mIndex = mEnableIndexCaching ? - new CachingBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()) : - new DiskBasedBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()); + mIndex = mEnableIndexCaching + ? new CachingBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()) + : new DiskBasedBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()); } return mIndex; } @Override - public boolean hasBrowseableIndex() { return false; } + public boolean hasBrowseableIndex() { + return false; + } @Override - public BrowseableBAMIndex getBrowseableIndex() { return null; } + public BrowseableBAMIndex getBrowseableIndex() { + return null; + } @Override public SAMRecordIterator iterator(final SAMFileSpan fileSpan) { @@ -297,7 +312,9 @@ public SAMRecordIterator iterator(final SAMFileSpan fileSpan) { } @Override - public SAMFileHeader getFileHeader() { return iterator.getSAMFileHeader(); } + public SAMFileHeader getFileHeader() { + return iterator.getSAMFileHeader(); + } @Override public SAMRecordIterator getIterator() { @@ -306,7 +323,8 @@ public SAMRecordIterator getIterator() { } try { if (cramFile != null) { - iterator = new CRAMIterator(new BufferedInputStream(new FileInputStream(cramFile)), referenceSource, validationStringency); + iterator = new CRAMIterator( + new BufferedInputStream(new FileInputStream(cramFile)), referenceSource, validationStringency); } else { iterator = new CRAMIterator(inputStream, referenceSource, validationStringency); } @@ -348,8 +366,7 @@ public void remove() { } @Override - public void close() { - } + public void close() {} @Override public SAMRecordIterator assertSorted(final SortOrder sortOrder) { @@ -358,8 +375,7 @@ public SAMRecordIterator assertSorted(final SortOrder sortOrder) { }; @Override - public CloseableIterator queryAlignmentStart(final String sequence, - final int start) { + public CloseableIterator queryAlignmentStart(final String sequence, final int start) { final SAMFileHeader fileHeader = getFileHeader(); final int referenceIndex = fileHeader.getSequenceIndex(sequence); // alignment start requires a filtering iterator to ensure that records in the @@ -374,10 +390,15 @@ public CloseableIterator queryUnmapped() { try { seekableStream.seek(0); iterator = new CRAMIterator(seekableStream, referenceSource, validationStringency); - seekableStream.seek(startOfLastLinearBin >>> 16); + // When startOfLastLinearBin is -1, there are no mapped reads and the entire file is + // unmapped. In that case, iterate from the beginning (already at position 0). + if (startOfLastLinearBin != -1) { + seekableStream.seek(startOfLastLinearBin >>> 16); + } boolean atAlignments; do { - atAlignments = iterator.advanceToAlignmentInContainer(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START); + atAlignments = iterator.advanceToAlignmentInContainer( + SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START); } while (!atAlignments && iterator.hasNext()); } catch (final IOException e) { throw new RuntimeEOFException(e); @@ -425,19 +446,29 @@ public DeferredCloseSeekableStream(final SeekableStream delegateStream) { } } - public SeekableStream getDelegate() { return delegateStream; } + public SeekableStream getDelegate() { + return delegateStream; + } @Override - public long length() { return delegateStream.length(); } + public long length() { + return delegateStream.length(); + } @Override - public long position() throws IOException { return delegateStream.position(); } + public long position() throws IOException { + return delegateStream.position(); + } @Override - public void seek(long position) throws IOException { delegateStream.seek(position); } + public void seek(long position) throws IOException { + delegateStream.seek(position); + } @Override - public int read() throws IOException { return delegateStream.read(); } + public int read() throws IOException { + return delegateStream.read(); + } @Override public int read(byte[] buffer, int offset, int length) throws IOException { @@ -451,10 +482,14 @@ public void close() throws IOException { } @Override - public boolean eof() throws IOException { return delegateStream.eof(); } + public boolean eof() throws IOException { + return delegateStream.eof(); + } @Override - public String getSource() { return delegateStream.getSource(); } + public String getSource() { + return delegateStream.getSource(); + } } @Override @@ -482,8 +517,7 @@ public ValidationStringency getValidationStringency() { } @Override - public CloseableIterator query(final QueryInterval[] intervals, - final boolean contained) { + public CloseableIterator query(final QueryInterval[] intervals, final boolean contained) { return new CRAMIntervalIterator(intervals, contained); } @@ -507,16 +541,16 @@ void enableFileSource(final SamReader reader, final boolean enabled) { * @param filePointers file pointer pairs corresponding to chunk boundaries for the * intervals */ - public CloseableIterator createIndexIterator(final QueryInterval[] intervals, - final boolean contained, - final long[] filePointers) { + public CloseableIterator createIndexIterator( + final QueryInterval[] intervals, final boolean contained, final long[] filePointers) { return new CRAMIntervalIterator(intervals, contained, filePointers); } // convert queries -> merged BAMFileSpan -> coordinate array private static long[] coordinatesFromQueryIntervals(BAMIndex index, QueryInterval[] queries) { ArrayList spanList = new ArrayList<>(1); - Arrays.asList(queries).forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end))); + Arrays.asList(queries) + .forEach(qi -> spanList.add(index.getSpanOverlapping(qi.referenceIndex, qi.start, qi.end))); BAMFileSpan spanArray[] = new BAMFileSpan[spanList.size()]; for (int i = 0; i < spanList.size(); i++) { spanArray[i] = spanList.get(i); @@ -557,8 +591,7 @@ protected void initializeIterator(final QueryInterval[] queryIntervals, final lo referenceSource, validationStringency, queryIntervals, - coordinates - ); + coordinates); getNextRecord(); // advance to the first record that matches the filter criteria } } @@ -626,7 +659,7 @@ private class CRAMAlignmentStartIterator extends CRAMIntervalIteratorBase { final BAMStartingAtIteratorFilter startingAtIteratorFilter; public CRAMAlignmentStartIterator(final int referenceIndex, final int start) { - super(new QueryInterval[]{new QueryInterval(referenceIndex, start, -1)}, true); + super(new QueryInterval[] {new QueryInterval(referenceIndex, start, -1)}, true); startingAtIteratorFilter = new BAMStartingAtIteratorFilter(referenceIndex, start); initializeIterator(intervals, coordinatesFromQueryIntervals(getIndex(), intervals)); } diff --git a/src/main/java/htsjdk/samtools/CRAMFileWriter.java b/src/main/java/htsjdk/samtools/CRAMFileWriter.java index 15c5c20a84..5342a377d2 100644 --- a/src/main/java/htsjdk/samtools/CRAMFileWriter.java +++ b/src/main/java/htsjdk/samtools/CRAMFileWriter.java @@ -19,7 +19,6 @@ import htsjdk.samtools.cram.structure.CRAMEncodingStrategy; import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.Log; - import java.io.OutputStream; public class CRAMFileWriter extends SAMFileWriterImpl { @@ -44,8 +43,7 @@ public CRAMFileWriter( final OutputStream outputStream, final CRAMReferenceSource referenceSource, final SAMFileHeader samFileHeader, - final String fileName) - { + final String fileName) { this(outputStream, null, referenceSource, samFileHeader, fileName); // defaults to presorted == true } @@ -66,8 +64,7 @@ public CRAMFileWriter( final OutputStream indexOS, final CRAMReferenceSource referenceSource, final SAMFileHeader samFileHeader, - final String fileName) - { + final String fileName) { this(outputStream, indexOS, true, referenceSource, samFileHeader, fileName); // defaults to presorted==true } @@ -83,24 +80,29 @@ public CRAMFileWriter( * * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null */ - public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted, - final CRAMReferenceSource referenceSource, final SAMFileHeader samFileHeader, final String fileName) { - this( new CRAMEncodingStrategy(), outputStream, indexOS, presorted, referenceSource, samFileHeader, fileName); + public CRAMFileWriter( + final OutputStream outputStream, + final OutputStream indexOS, + final boolean presorted, + final CRAMReferenceSource referenceSource, + final SAMFileHeader samFileHeader, + final String fileName) { + this(new CRAMEncodingStrategy(), outputStream, indexOS, presorted, referenceSource, samFileHeader, fileName); } /** - * Create a CRAMFileWriter and optional index on output streams. - * - * @param encodingStrategy encoding strategy to use when writing - * @param outputStream where to write the output. Can not be null. - * @param indexOS where to write the output index. Can be null if no index is required. - * @param presorted if true records written to this writer must already be sorted in the order specified by the header - * @param referenceSource reference source - * @param samFileHeader {@link SAMFileHeader} to be used. Can not be null. Sort order is determined by the sortOrder property of this arg. - * @param fileName used for display in error message display - * - * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null - */ + * Create a CRAMFileWriter and optional index on output streams. + * + * @param encodingStrategy encoding strategy to use when writing + * @param outputStream where to write the output. Can not be null. + * @param indexOS where to write the output index. Can be null if no index is required. + * @param presorted if true records written to this writer must already be sorted in the order specified by the header + * @param referenceSource reference source + * @param samFileHeader {@link SAMFileHeader} to be used. Can not be null. Sort order is determined by the sortOrder property of this arg. + * @param fileName used for display in error message display + * + * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null + */ public CRAMFileWriter( final CRAMEncodingStrategy encodingStrategy, final OutputStream outputStream, @@ -142,7 +144,8 @@ protected void writeAlignment(final SAMRecord alignment) { @Override protected void writeHeader(final String textHeader) { - writeHeader(new SAMTextHeaderCodec().decode(BufferedLineReader.fromString(textHeader),fileName != null ? fileName : null)); + writeHeader(new SAMTextHeaderCodec() + .decode(BufferedLineReader.fromString(textHeader), fileName != null ? fileName : null)); } @Override @@ -164,5 +167,4 @@ protected void finish() { protected String getFilename() { return fileName; } - } diff --git a/src/main/java/htsjdk/samtools/CRAMIndexer.java b/src/main/java/htsjdk/samtools/CRAMIndexer.java index 5e332a87ea..f58a2d8ce8 100644 --- a/src/main/java/htsjdk/samtools/CRAMIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMIndexer.java @@ -1,6 +1,5 @@ package htsjdk.samtools; -import htsjdk.samtools.cram.structure.CompressorCache; import htsjdk.samtools.cram.structure.Container; /** diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index d73c2b3021..2dc693c879 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -23,13 +23,11 @@ import htsjdk.samtools.cram.ref.CRAMReferenceSource; import htsjdk.samtools.cram.structure.*; import htsjdk.samtools.seekablestream.SeekableStream; - +import htsjdk.samtools.util.RuntimeIOException; import java.io.Closeable; import java.io.InputStream; import java.util.*; -import htsjdk.samtools.util.RuntimeIOException; - public class CRAMIterator implements SAMRecordIterator, Closeable { private final CountingInputStream countingInputStream; private final CramContainerIterator containerIterator; @@ -54,11 +52,13 @@ public class CRAMIterator implements SAMRecordIterator, Closeable { * (for identification by the validator which records are invalid) */ private long samRecordIndex; + private Iterator samRecordIterator = Collections.EMPTY_LIST.iterator(); - public CRAMIterator(final InputStream inputStream, - final CRAMReferenceSource referenceSource, - final ValidationStringency validationStringency) { + public CRAMIterator( + final InputStream inputStream, + final CRAMReferenceSource referenceSource, + final ValidationStringency validationStringency) { this.countingInputStream = new CountingInputStream(inputStream); this.containerIterator = new CramContainerIterator(this.countingInputStream); @@ -71,11 +71,12 @@ public CRAMIterator(final InputStream inputStream, this.queryIntervals = null; } - public CRAMIterator(final SeekableStream seekableStream, - final CRAMReferenceSource referenceSource, - final ValidationStringency validationStringency, - final QueryInterval[] queryIntervals, - final long[] coordinates) { + public CRAMIterator( + final SeekableStream seekableStream, + final CRAMReferenceSource referenceSource, + final ValidationStringency validationStringency, + final QueryInterval[] queryIntervals, + final long[] coordinates) { this.countingInputStream = new CountingInputStream(seekableStream); this.containerIterator = CramSpanContainerIterator.fromFileSpan(seekableStream, coordinates); @@ -110,12 +111,13 @@ private BAMIteratorFilter.FilteringIteratorState nextContainer() { if (containerMatchesQuery(container)) { samRecords = container.getSAMRecords( - validationStringency, - cramReferenceRegion, - compressorCache, - getSAMFileHeader()); + validationStringency, cramReferenceRegion, compressorCache, getSAMFileHeader()); samRecordIterator = samRecords.iterator(); - return BAMIteratorFilter.FilteringIteratorState.MATCHES_FILTER; + // A container may match the query but produce no records (e.g. a container with + // only a compression header and no slices). Skip to the next container in that case. + return samRecords.isEmpty() + ? BAMIteratorFilter.FilteringIteratorState.CONTINUE_ITERATION + : BAMIteratorFilter.FilteringIteratorState.MATCHES_FILTER; } else { return BAMIteratorFilter.FilteringIteratorState.CONTINUE_ITERATION; } @@ -128,21 +130,25 @@ private boolean containerMatchesQuery(final Container container) { // binary search our query intervals to see if the alignment span of this container // overlaps any query - it doesn't matter which one, we only care whether or not there is a match final AlignmentContext alignmentContext = container.getAlignmentContext(); - return (!alignmentContext.getReferenceContext().isMappedSingleRef() || - Arrays.binarySearch( - queryIntervals, - new QueryInterval( - alignmentContext.getReferenceContext().getReferenceContextID(), - alignmentContext.getAlignmentStart(), - alignmentContext.getAlignmentStart() + alignmentContext.getAlignmentSpan() - 1 - ), - overlapsContainerSpan) >= 0); + return (!alignmentContext.getReferenceContext().isMappedSingleRef() + || Arrays.binarySearch( + queryIntervals, + new QueryInterval( + alignmentContext + .getReferenceContext() + .getReferenceContextID(), + alignmentContext.getAlignmentStart(), + alignmentContext.getAlignmentStart() + + alignmentContext.getAlignmentSpan() + - 1), + overlapsContainerSpan) + >= 0); } } - //TODO: this should filter at the slice level! - //we don't actually care which QueryInterval overlaps with the container; we just want to know if there is one... - private final static Comparator overlapsContainerSpan = (queryInterval, containerInterval) -> { + // TODO: this should filter at the slice level! + // we don't actually care which QueryInterval overlaps with the container; we just want to know if there is one... + private static final Comparator overlapsContainerSpan = (queryInterval, containerInterval) -> { int comp = queryInterval.referenceIndex - containerInterval.referenceIndex; if (comp != 0) { return comp; @@ -150,9 +156,7 @@ private boolean containerMatchesQuery(final Container container) { if (queryInterval.end <= 0) { // our query interval specifies a symbolic end, so call it a match if the container span // overlaps the start of the queryInterval - return containerInterval.end <= queryInterval.start ? - -1 : - 0; + return containerInterval.end <= queryInterval.start ? -1 : 0; } else if (containerInterval.overlaps(queryInterval)) { return 0; // there is overlap so call it a match } @@ -165,7 +169,7 @@ private boolean containerMatchesQuery(final Container container) { * @param refIndex reference sequence index * @param pos alignment start to skip to */ - //TODO: this should first select the correct slice so we don't decode all slices unnecessarily + // TODO: this should first select the correct slice so we don't decode all slices unnecessarily public boolean advanceToAlignmentInContainer(final int refIndex, final int pos) { if (!hasNext()) return false; int i = 0; @@ -200,7 +204,7 @@ public boolean hasNext() { if (!samRecordIterator.hasNext()) { BAMIteratorFilter.FilteringIteratorState nextContainerPasses = BAMIteratorFilter.FilteringIteratorState.CONTINUE_ITERATION; - while (nextContainerPasses == BAMIteratorFilter.FilteringIteratorState.CONTINUE_ITERATION){ + while (nextContainerPasses == BAMIteratorFilter.FilteringIteratorState.CONTINUE_ITERATION) { nextContainerPasses = nextContainer(); } return nextContainerPasses == BAMIteratorFilter.FilteringIteratorState.MATCHES_FILTER; @@ -234,7 +238,8 @@ public void close() { if (countingInputStream != null) { countingInputStream.close(); } - } catch (final RuntimeIOException e) { } + } catch (final RuntimeIOException e) { + } } public long getFirstContainerOffset() { @@ -269,5 +274,4 @@ public void setFileSource(final SamReader mReader) { public SAMFileHeader getSAMFileHeader() { return samFileHeader; } - } diff --git a/src/main/java/htsjdk/samtools/CSIIndex.java b/src/main/java/htsjdk/samtools/CSIIndex.java index 706eb450f8..4e34900e54 100644 --- a/src/main/java/htsjdk/samtools/CSIIndex.java +++ b/src/main/java/htsjdk/samtools/CSIIndex.java @@ -3,7 +3,6 @@ import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; import java.io.IOException; import java.nio.file.Path; @@ -23,6 +22,7 @@ public class CSIIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex * the superclass constructor. */ private int binDepth; + private int minShift; private int maxBins; private int maxSpan; @@ -33,7 +33,6 @@ public class CSIIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex /** * Constructors */ - public CSIIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) { this(IndexFileBufferFactory.getBuffer(stream), stream.getSource(), dictionary); } @@ -46,7 +45,8 @@ public CSIIndex(final File file, boolean enableMemoryMapping, final SAMSequenceD this(IndexFileBufferFactory.getBuffer(file, enableMemoryMapping), file.getName(), dictionary); } - private CSIIndex(final IndexFileBuffer indexFileBuffer, final String source, final SAMSequenceDictionary dictionary) { + private CSIIndex( + final IndexFileBuffer indexFileBuffer, final String source, final SAMSequenceDictionary dictionary) { super(indexFileBuffer, source, dictionary); } @@ -62,7 +62,9 @@ public int getBinDepth() { return binDepth; } - private void setBinDepth(int binDepth) { this.binDepth = binDepth; } + private void setBinDepth(int binDepth) { + this.binDepth = binDepth; + } /** * 2^(min shift) is the smallest width of a bin @@ -79,24 +81,34 @@ public int getMaxBins() { return maxBins; } - private void setMaxBins(int binDepth) { this.maxBins = ((1<<3*binDepth) - 1)/7; } + private void setMaxBins(int binDepth) { + this.maxBins = ((1 << 3 * binDepth) - 1) / 7; + } public int getMaxSpan() { return maxSpan; } private void setMaxSpan(int binDepth, int minShift) { - this.maxSpan = 1<<(minShift + 3*(binDepth - 1)); + this.maxSpan = 1 << (minShift + 3 * (binDepth - 1)); } - public byte[] getAuxData() { return auxData; } + public byte[] getAuxData() { + return auxData; + } - private void setAuxData(byte[] auxData) { this.auxData = auxData; } + private void setAuxData(byte[] auxData) { + this.auxData = auxData; + } @Override - public int getNumberOfReferences() { return nReferences; } + public int getNumberOfReferences() { + return nReferences; + } - private void setNumberOfReferences(int nReferences) { this.nReferences = nReferences; } + private void setNumberOfReferences(int nReferences) { + this.nReferences = nReferences; + } /** * Computes the number of bins on the given level. @@ -106,9 +118,10 @@ private void setMaxSpan(int binDepth, int minShift) { @Override public int getLevelSize(final int levelNumber) { if (levelNumber >= getBinDepth()) { - throw new SAMException("Level number (" + levelNumber + ") is greater than or equal to maximum (" + getBinDepth() + ")."); + throw new SAMException( + "Level number (" + levelNumber + ") is greater than or equal to maximum (" + getBinDepth() + ")."); } - return 1<<3*(levelNumber); + return 1 << 3 * (levelNumber); } /** @@ -117,46 +130,47 @@ public int getLevelSize(final int levelNumber) { */ public int getFirstBinInLevelForCSI(final int levelNumber) { if (levelNumber >= getBinDepth()) { - throw new SAMException("Level number (" + levelNumber + ") is greater than or equal to maximum (" + getBinDepth() + ")."); + throw new SAMException( + "Level number (" + levelNumber + ") is greater than or equal to maximum (" + getBinDepth() + ")."); } - return ((1<<3*levelNumber) - 1)/7; + return ((1 << 3 * levelNumber) - 1) / 7; } @Override public int getLevelForBin(Bin bin) { - if(bin == null || bin.getBinNumber() > getMaxBins()) { - throw new SAMException("Tried to get level for invalid bin: " + bin); + if (bin == null || bin.getBinNumber() > getMaxBins()) { + throw new SAMException("Tried to get level for invalid bin: " + bin); } - for (int i = getBinDepth()-1; i > -1 ; i--) { - if (bin.getBinNumber() >= getFirstBinInLevelForCSI(i)) { - return i; - } + for (int i = getBinDepth() - 1; i > -1; i--) { + if (bin.getBinNumber() >= getFirstBinInLevelForCSI(i)) { + return i; + } } throw new SAMException("Unable to find correct level for bin: " + bin); } @Override public int getFirstLocusInBin(Bin bin) { - if(bin == null || bin.getBinNumber() > getMaxBins()) { + if (bin == null || bin.getBinNumber() > getMaxBins()) { throw new SAMException("Tried to get first locus for invalid bin: " + bin); } int level = getLevelForBin(bin); int firstBinOnLevel = getFirstBinInLevelForCSI(level); int levelSize = getLevelSize(level); - return (bin.getBinNumber() - firstBinOnLevel)*(getMaxSpan()/levelSize) + 1; + return (bin.getBinNumber() - firstBinOnLevel) * (getMaxSpan() / levelSize) + 1; } @Override public int getLastLocusInBin(Bin bin) { - if(bin == null || bin.getBinNumber() > getMaxBins()) { + if (bin == null || bin.getBinNumber() > getMaxBins()) { throw new SAMException("Tried to get last locus for invalid bin: " + bin); } int level = getLevelForBin(bin); int firstBinOnLevel = getFirstBinInLevelForCSI(level); int levelSize = getLevelSize(level); - return (bin.getBinNumber() - firstBinOnLevel + 1)*(getMaxSpan()/levelSize); + return (bin.getBinNumber() - firstBinOnLevel + 1) * (getMaxSpan() / levelSize); } @Override @@ -165,7 +179,7 @@ public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) if (regionBins == null) { return null; } - return new BinList(referenceIndex,regionBins); + return new BinList(referenceIndex, regionBins); } @Override @@ -175,28 +189,27 @@ public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endP long minimumOffset = 0L; Bin targetBin; - if(queryResults == null) { + if (queryResults == null) { return null; } /** Compute 'minimumOffset' by searching the lowest level bin containing 'startPos'. - If the computed bin is not in the index, try the next bin to the left, belonging - to the same parent. If it is the first sibling bin, try the parent bin. + * If the computed bin is not in the index, try the next bin to the left, belonging + * to the same parent. If it is the first sibling bin, try the parent bin. */ - do { int firstBinNumber; targetBin = queryResults.getBins().getBin(initialBinNumber); if (targetBin != null) { break; } - firstBinNumber = (getParentBinNumber(initialBinNumber)<<3) + 1; + firstBinNumber = (getParentBinNumber(initialBinNumber) << 3) + 1; if (initialBinNumber > firstBinNumber) { initialBinNumber--; } else { initialBinNumber = getParentBinNumber(initialBinNumber); } - } while(initialBinNumber != 0); + } while (initialBinNumber != 0); if (initialBinNumber == 0) { targetBin = queryResults.getBins().getBin(initialBinNumber); @@ -207,7 +220,7 @@ public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endP } List chunkList = new ArrayList(); - for(final Chunk chunk: queryResults.getAllChunks()) { + for (final Chunk chunk : queryResults.getAllChunks()) { chunkList.add(chunk.clone()); } @@ -217,42 +230,41 @@ public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endP @Override public BAMFileSpan getSpanOverlapping(final Bin bin) { - if(bin == null) { + if (bin == null) { return null; } final int referenceSequence = bin.getReferenceSequence(); final BAMIndexContent queryResults = getQueryResults(referenceSequence); - if(queryResults == null) { + if (queryResults == null) { return null; } final int binLevel = getLevelForBin(bin); final int firstLocusInBin = getFirstLocusInBin(bin); - long minimumOffset = bin instanceof BinWithOffset ? ((BinWithOffset)bin).getlOffset() : 0L; + long minimumOffset = bin instanceof BinWithOffset ? ((BinWithOffset) bin).getlOffset() : 0L; // Add the specified bin to the tree if it exists. final List binTree = new ArrayList(); - if(queryResults.containsBin(bin)) { + if (queryResults.containsBin(bin)) { binTree.add(queryResults.getBins().getBin(bin.getBinNumber())); } int currentBinLevel = binLevel; - while(--currentBinLevel >= 0) { + while (--currentBinLevel >= 0) { final int binStart = getFirstBinInLevelForCSI(currentBinLevel); - final int binWidth = getMaxSpan()/getLevelSize(currentBinLevel); - final int parentBinNumber = firstLocusInBin/binWidth + binStart; + final int binWidth = getMaxSpan() / getLevelSize(currentBinLevel); + final int parentBinNumber = firstLocusInBin / binWidth + binStart; final Bin parentBin = queryResults.getBins().getBin(parentBinNumber); - if(parentBin != null && queryResults.containsBin(parentBin)) { + if (parentBin != null && queryResults.containsBin(parentBin)) { binTree.add(parentBin); } } List chunkList = new ArrayList(); - for(final Bin coveringBin: binTree) { - for(final Chunk chunk: coveringBin.getChunkList()) - chunkList.add(chunk.clone()); + for (final Bin coveringBin : binTree) { + for (final Chunk chunk : coveringBin.getChunkList()) chunkList.add(chunk.clone()); } chunkList = Chunk.optimizeChunkList(chunkList, minimumOffset); @@ -291,8 +303,8 @@ protected void verifyIndexMagicNumber(final String sourceName) { final byte[] buffer = new byte[BAMFileConstants.CSI_MINSHIFT_OFFSET]; readBytes(buffer); // magic if (!Arrays.equals(buffer, BAMFileConstants.CSI_INDEX_MAGIC)) { - throw new RuntimeIOException("Invalid file header in BAM CSI index " + sourceName + - ": " + new String(buffer)); + throw new RuntimeIOException( + "Invalid file header in BAM CSI index " + sourceName + ": " + new String(buffer)); } } @@ -310,7 +322,7 @@ private void readAuxDataAndNRef() { if (BAMFileConstants.CSI_AUXDATA_OFFSET != position()) { seek(BAMFileConstants.CSI_AUXDATA_OFFSET); } - //set the aux data length first + // set the aux data length first byte[] auxData = new byte[readInteger()]; // l_aux readBytes(auxData); // aux setAuxData(auxData); @@ -379,11 +391,11 @@ protected BAMIndexContent query(final int referenceSequence, final int startPos, final int binCount = readInteger(); // n_bin boolean metaDataSeen = false; - final Bin[] bins = new BinWithOffset[getMaxBinNumberForReference(referenceSequence) +1]; + final Bin[] bins = new BinWithOffset[getMaxBinNumberForReference(referenceSequence) + 1]; for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(); // bin final long lOffset = readLong(); // l_offset - final int nChunks = readInteger(); // n_chunk + final int nChunks = readInteger(); // n_chunk List chunks; Chunk lastChunk = null; @@ -406,7 +418,8 @@ protected BAMIndexContent query(final int referenceSequence, final int startPos, bins[indexBin] = bin; } - return new BAMIndexContent(referenceSequence, bins, binCount - (metaDataSeen? 1 : 0), new BAMIndexMetaData(metaDataChunks), null); + return new BAMIndexContent( + referenceSequence, bins, binCount - (metaDataSeen ? 1 : 0), new BAMIndexMetaData(metaDataChunks), null); } /** @@ -472,12 +485,13 @@ public BAMIndexContent getQueryResults(final int referenceSequence) { @Override protected void skipToSequence(final int sequenceIndex) { - if(sequenceIndex > getNumberOfReferences()) { - throw new SAMException("Sequence index (" + sequenceIndex + ") is greater than maximum (" + getNumberOfReferences() + ")."); + if (sequenceIndex > getNumberOfReferences()) { + throw new SAMException("Sequence index (" + sequenceIndex + ") is greater than maximum (" + + getNumberOfReferences() + ")."); } - //Use sequence position cache if available - if(sequenceIndexes[sequenceIndex] != -1){ + // Use sequence position cache if available + if (sequenceIndexes[sequenceIndex] != -1) { seek(sequenceIndexes[sequenceIndex]); return; } @@ -497,7 +511,7 @@ protected void skipToSequence(final int sequenceIndex) { } } - //Update sequence position cache + // Update sequence position cache sequenceIndexes[sequenceIndex] = position(); } } diff --git a/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java b/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java index 4a1a264d92..2bffd3f4d3 100644 --- a/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/CachingBAMFileIndex.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.File; import java.util.ArrayList; import java.util.BitSet; @@ -34,8 +33,7 @@ * Class for reading BAM file indices, caching each contig as it's loaded and * dropping values when the next contig is loaded. */ -class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex -{ +class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex { // Since null is a valid return value for this index, it's possible to have lastReferenceIndex != null and // lastReference == null, this is effectively caching the return value null private Integer lastReferenceIndex = null; @@ -52,7 +50,8 @@ public CachingBAMFileIndex(final SeekableStream stream, final SAMSequenceDiction super(stream, dictionary); } - public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { + public CachingBAMFileIndex( + final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { super(file, dictionary, useMemoryMapping); } @@ -69,8 +68,7 @@ public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictiona public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) { final BAMIndexContent queryResults = getQueryResults(referenceIndex); - if(queryResults == null) - return null; + if (queryResults == null) return null; final List chunkList = queryResults.getChunksOverlapping(startPos, endPos); if (chunkList == null) return null; @@ -91,7 +89,7 @@ public BinList getBinsOverlapping(final int referenceIndex, final int startPos, if (regionBins == null) { return null; } - return new BinList(referenceIndex,regionBins); + return new BinList(referenceIndex, regionBins); } /** @@ -101,62 +99,59 @@ public BinList getBinsOverlapping(final int referenceIndex, final int startPos, */ @Override public BAMFileSpan getSpanOverlapping(final Bin bin) { - if(bin == null) - return null; + if (bin == null) return null; final int referenceSequence = bin.getReferenceSequence(); final BAMIndexContent indexQuery = getQueryResults(referenceSequence); - if(indexQuery == null) - return null; + if (indexQuery == null) return null; final int binLevel = getLevelForBin(bin); final int firstLocusInBin = getFirstLocusInBin(bin); // Add the specified bin to the tree if it exists. final List binTree = new ArrayList<>(); - if(indexQuery.containsBin(bin)) - binTree.add(indexQuery.getBins().getBin(bin.getBinNumber())); + if (indexQuery.containsBin(bin)) binTree.add(indexQuery.getBins().getBin(bin.getBinNumber())); int currentBinLevel = binLevel; - while(--currentBinLevel >= 0) { + while (--currentBinLevel >= 0) { final int binStart = getFirstBinInLevel(currentBinLevel); - final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel); - final int binNumber = firstLocusInBin/binWidth + binStart; + final int binWidth = getMaxAddressibleGenomicLocation() / getLevelSize(currentBinLevel); + final int binNumber = firstLocusInBin / binWidth + binStart; final Bin parentBin = indexQuery.getBins().getBin(binNumber); - if(parentBin != null && indexQuery.containsBin(parentBin)) - binTree.add(parentBin); + if (parentBin != null && indexQuery.containsBin(parentBin)) binTree.add(parentBin); } List chunkList = new ArrayList(); - for(final Bin coveringBin: binTree) { - for(final Chunk chunk: coveringBin.getChunkList()) - chunkList.add(chunk.clone()); + for (final Bin coveringBin : binTree) { + for (final Chunk chunk : coveringBin.getChunkList()) chunkList.add(chunk.clone()); } final int start = getFirstLocusInBin(bin); - chunkList = Chunk.optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start)); + chunkList = + Chunk.optimizeChunkList(chunkList, indexQuery.getLinearIndex().getMinimumOffset(start)); return new BAMFileSpan(chunkList); } /** * Looks up the cached BAM query results if they're still in the cache and not expired. Otherwise, * retrieves the cache results from disk. - * @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references. + * @param referenceIndex The reference to load. CachingBAMFileIndex only stores index data for entire references. * @return The index information for this reference or null if no index information is available for the given index. */ @Override protected BAMIndexContent getQueryResults(final int referenceIndex) { // If this query is for the same reference index as the last query, return it. - // This compares a boxed Integer to an int with == which is ok because the Integer will be unboxed to the primitive value - if(lastReferenceIndex!=null && lastReferenceIndex == referenceIndex){ + // This compares a boxed Integer to an int with == which is ok because the Integer will be unboxed to the + // primitive value + if (lastReferenceIndex != null && lastReferenceIndex == referenceIndex) { cacheHits++; return lastReference; } // If not attempt to load it from disk. - final BAMIndexContent queryResults = query(referenceIndex,1,-1); + final BAMIndexContent queryResults = query(referenceIndex, 1, -1); cacheMisses++; lastReferenceIndex = referenceIndex; lastReference = queryResults; diff --git a/src/main/java/htsjdk/samtools/CachingBamFileIndexOptimizedForMerging.java b/src/main/java/htsjdk/samtools/CachingBamFileIndexOptimizedForMerging.java index ae0ae98056..04bb820794 100644 --- a/src/main/java/htsjdk/samtools/CachingBamFileIndexOptimizedForMerging.java +++ b/src/main/java/htsjdk/samtools/CachingBamFileIndexOptimizedForMerging.java @@ -7,28 +7,28 @@ * null BAMIndexContent objects if all bins are empty. */ class CachingBamFileIndexOptimizedForMerging extends CachingBAMFileIndex { - CachingBamFileIndexOptimizedForMerging(SeekableStream stream, SAMSequenceDictionary dictionary) { - super(stream, dictionary); - } + CachingBamFileIndexOptimizedForMerging(SeekableStream stream, SAMSequenceDictionary dictionary) { + super(stream, dictionary); + } - @Override - protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) { - seek(4); + @Override + protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) { + seek(4); - final int sequenceCount = readInteger(); + final int sequenceCount = readInteger(); - if (referenceSequence >= sequenceCount) { - return null; - } + if (referenceSequence >= sequenceCount) { + return null; + } - skipToSequence(referenceSequence); + skipToSequence(referenceSequence); - final int binCount = readInteger(); + final int binCount = readInteger(); - if (binCount == 0) { - return null; - } + if (binCount == 0) { + return null; + } - return super.query(referenceSequence, startPos, endPos); - } + return super.query(referenceSequence, startPos, endPos); + } } diff --git a/src/main/java/htsjdk/samtools/ChainedDownsamplingIterator.java b/src/main/java/htsjdk/samtools/ChainedDownsamplingIterator.java index 4fa3a7d86f..b400091ca2 100644 --- a/src/main/java/htsjdk/samtools/ChainedDownsamplingIterator.java +++ b/src/main/java/htsjdk/samtools/ChainedDownsamplingIterator.java @@ -56,16 +56,16 @@ class ChainedDownsamplingIterator extends HighAccuracyDownsamplingIterator { * Uses an assumed number of reads tested as this is often not known until after the fact. */ private static double adjustProportion(final double p) { - final double ciAdjustment99_9 = 3.3 * Math.sqrt(p/MIN_ACCURATE_INPUT_READS); + final double ciAdjustment99_9 = 3.3 * Math.sqrt(p / MIN_ACCURATE_INPUT_READS); return Math.min(1, p + ciAdjustment99_9); } - /** * Resets statistics before reading from the underlying iterator. */ @Override - protected void readFromUnderlyingIterator(final List recs, final Set names, final int templatesToRead) { + protected void readFromUnderlyingIterator( + final List recs, final Set names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); @@ -79,7 +79,7 @@ protected int calculateTemplatesToKeep(final int templatesRead, final double ove final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); - final int retval = super.calculateTemplatesToKeep(templatesRead, p); + final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. diff --git a/src/main/java/htsjdk/samtools/Chunk.java b/src/main/java/htsjdk/samtools/Chunk.java index 8a6d4bf802..cdc919bbd6 100644 --- a/src/main/java/htsjdk/samtools/Chunk.java +++ b/src/main/java/htsjdk/samtools/Chunk.java @@ -1,7 +1,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.BlockCompressedFilePointerUtil; - import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; @@ -17,7 +16,7 @@ * * See the SAM/BAM spec for more details. */ -public class Chunk implements Cloneable, Serializable,Comparable { +public class Chunk implements Cloneable, Serializable, Comparable { private static final long serialVersionUID = 1L; /** @@ -40,7 +39,7 @@ public Chunk(final long start, final long end) { @Override public Chunk clone() { - return new Chunk(mChunkStart,mChunkEnd); + return new Chunk(mChunkStart, mChunkEnd); } public long getChunkStart() { @@ -88,12 +87,11 @@ public boolean equals(final Object o) { */ public boolean overlaps(final Chunk other) { final int comparison = this.compareTo(other); - if(comparison == 0) - return true; + if (comparison == 0) return true; // "sort" the two chunks using the comparator. - final Chunk leftMost = comparison==-1 ? this : other; - final Chunk rightMost = comparison==1 ? this : other; + final Chunk leftMost = comparison == -1 ? this : other; + final Chunk rightMost = comparison == 1 ? this : other; final long leftMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(leftMost.getChunkEnd()); final long rightMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(rightMost.getChunkStart()); @@ -101,15 +99,12 @@ public boolean overlaps(final Chunk other) { // If the left block's address is after the right block's address, compare the two blocks. // If the two blocks are identical, compare the block offsets. // If the right block is after the left block, no overlap is possible. - if(leftMostBlockAddress > rightMostBlockAddress) - return true; - else if(leftMostBlockAddress == rightMostBlockAddress) { + if (leftMostBlockAddress > rightMostBlockAddress) return true; + else if (leftMostBlockAddress == rightMostBlockAddress) { final int leftMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(leftMost.getChunkEnd()); final int rightMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(rightMost.getChunkStart()); return leftMostOffset > rightMostOffset; - } - else - return false; + } else return false; } /** @@ -118,12 +113,17 @@ else if(leftMostBlockAddress == rightMostBlockAddress) { * @return True if the two chunks are adjacent. Returns false if the chunks overlap or are discontinuous. */ public boolean isAdjacentTo(final Chunk other) { - // Simpler implementation would be to == the chunk end of one to the chunk start of the other. Chose this implementation to ensure that all chunk - // comparisons point directly to the - return (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkEnd()) == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkStart()) && - BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkEnd()) == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkStart())) || - (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkStart()) == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkEnd()) && - BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkStart()) == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkEnd())); + // Simpler implementation would be to == the chunk end of one to the chunk start of the other. Chose this + // implementation to ensure that all chunk + // comparisons point directly to the + return (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkEnd()) + == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkStart()) + && BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkEnd()) + == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkStart())) + || (BlockCompressedFilePointerUtil.getBlockAddress(this.getChunkStart()) + == BlockCompressedFilePointerUtil.getBlockAddress(other.getChunkEnd()) + && BlockCompressedFilePointerUtil.getBlockOffset(this.getChunkStart()) + == BlockCompressedFilePointerUtil.getBlockOffset(other.getChunkEnd())); } /** @@ -148,7 +148,8 @@ public int hashCode() { @Override public String toString() { - return String.format("%d:%d-%d:%d",mChunkStart >> 16,mChunkStart & 0xFFFF,mChunkEnd >> 16,mChunkEnd & 0xFFFF); + return String.format( + "%d:%d-%d:%d", mChunkStart >> 16, mChunkStart & 0xFFFF, mChunkEnd >> 16, mChunkEnd & 0xFFFF); } /** @@ -161,7 +162,7 @@ public static List optimizeChunkList(final List chunks, final long final List result = new ArrayList(); for (final Chunk chunk : chunks) { if (chunk.getChunkEnd() <= minimumOffset) { - continue; // linear index optimization + continue; // linear index optimization } if (result.isEmpty()) { result.add(chunk); diff --git a/src/main/java/htsjdk/samtools/Cigar.java b/src/main/java/htsjdk/samtools/Cigar.java index 8fafd639f1..5eb3259cd8 100644 --- a/src/main/java/htsjdk/samtools/Cigar.java +++ b/src/main/java/htsjdk/samtools/Cigar.java @@ -43,8 +43,7 @@ public class Cigar implements Serializable, Iterable { private final List cigarElements = new ArrayList(); - public Cigar() { - } + public Cigar() {} public Cigar(final List cigarElements) { this.cigarElements.addAll(cigarElements); @@ -84,7 +83,8 @@ public int getReferenceLength() { case X: length += element.getLength(); break; - default: break; + default: + break; } } return length; @@ -105,7 +105,8 @@ public int getPaddedReferenceLength() { case P: length += element.getLength(); break; - default: break; + default: + break; } } return length; @@ -124,8 +125,8 @@ public int getReadLength() { public static int getReadLength(final List cigarElements) { int length = 0; for (final CigarElement element : cigarElements) { - if (element.getOperator().consumesReadBases()){ - length += element.getLength(); + if (element.getOperator().consumesReadBases()) { + length += element.getLength(); } } return length; @@ -149,8 +150,11 @@ public List isValid(final String readName, final long record final CigarElement element = cigarElements.get(i); if (element.getLength() == 0) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "CIGAR element with zero length", readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "CIGAR element with zero length", + readName, + recordNumber)); } // clipping operator can only be at start or end of CIGAR final CigarOperator op = element.getOperator(); @@ -158,8 +162,11 @@ public List isValid(final String readName, final long record if (op == CigarOperator.H) { if (i != 0 && i != cigarElements.size() - 1) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Hard clipping operator not at start or end of CIGAR", readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Hard clipping operator not at start or end of CIGAR", + readName, + recordNumber)); } } else { if (op != CigarOperator.S) throw new IllegalStateException("Should never happen: " + op.name()); @@ -171,40 +178,49 @@ public List isValid(final String readName, final long record // from the end. } else if (cigarElements.get(0).getOperator() != CigarOperator.H) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Soft clipping CIGAR operator can only be inside of hard clipping operator", - readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Soft clipping CIGAR operator can only be inside of hard clipping operator", + readName, + recordNumber)); } } else if (i == cigarElements.size() - 2) { if (cigarElements.get(cigarElements.size() - 1).getOperator() != CigarOperator.H) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Soft clipping CIGAR operator can only be inside of hard clipping operator", - readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Soft clipping CIGAR operator can only be inside of hard clipping operator", + readName, + recordNumber)); } } else { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Soft clipping CIGAR operator can at start or end of read, or be inside of hard clipping operator", - readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Soft clipping CIGAR operator can at start or end of read, or be inside of hard clipping operator", + readName, + recordNumber)); } - } } else if (isRealOperator(op)) { // Must be at least one real operator (MIDN) seenRealOperator = true; // There should be an M or P operator between any pair of IDN operators if (isInDelOperator(op)) { - for (int j = i+1; j < cigarElements.size(); ++j) { + for (int j = i + 1; j < cigarElements.size(); ++j) { final CigarOperator nextOperator = cigarElements.get(j).getOperator(); // Allow - if ((isRealOperator(nextOperator) && !isInDelOperator(nextOperator)) || isPaddingOperator(nextOperator)) { + if ((isRealOperator(nextOperator) && !isInDelOperator(nextOperator)) + || isPaddingOperator(nextOperator)) { break; } if (isInDelOperator(nextOperator) && op == nextOperator) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR, - "No M or N operator between pair of " + op.name() + " operators in CIGAR", readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR, + "No M or N operator between pair of " + op.name() + " operators in CIGAR", + readName, + recordNumber)); } } } @@ -214,42 +230,55 @@ public List isValid(final String readName, final long record * Removed restriction that padding not be the first operator because if a read starts in the middle of a pad * in a padded reference, it is necessary to precede the read with padding so that alignment start refers to a * position on the unpadded reference. - */ + */ } else if (i == cigarElements.size() - 1) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Padding operator not valid at end of CIGAR", readName, recordNumber)); - } else if (!isRealOperator(cigarElements.get(i-1).getOperator()) || - !isRealOperator(cigarElements.get(i+1).getOperator())) { + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Padding operator not valid at end of CIGAR", + readName, + recordNumber)); + } else if (!isRealOperator(cigarElements.get(i - 1).getOperator()) + || !isRealOperator(cigarElements.get(i + 1).getOperator())) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "Padding operator not between real operators in CIGAR", readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "Padding operator not between real operators in CIGAR", + readName, + recordNumber)); } } } if (!seenRealOperator) { if (ret == null) ret = new ArrayList(); - ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR, - "No real operator (M|I|D|N) in CIGAR", readName, recordNumber)); + ret.add(new SAMValidationError( + SAMValidationError.Type.INVALID_CIGAR, + "No real operator (M|I|D|N) in CIGAR", + readName, + recordNumber)); } return ret; } private static boolean isRealOperator(final CigarOperator op) { - return op == CigarOperator.M || op == CigarOperator.EQ || op == CigarOperator.X || - op == CigarOperator.I || op == CigarOperator.D || op == CigarOperator.N; + return op == CigarOperator.M + || op == CigarOperator.EQ + || op == CigarOperator.X + || op == CigarOperator.I + || op == CigarOperator.D + || op == CigarOperator.N; } private static boolean isInDelOperator(final CigarOperator op) { - return op !=null && op.isIndel(); + return op != null && op.isIndel(); } private static boolean isClippingOperator(final CigarOperator op) { - return op !=null && op.isClipping(); + return op != null && op.isClipping(); } private static boolean isPaddingOperator(final CigarOperator op) { - return op !=null && op.isPadding(); + return op != null && op.isPadding(); } @Override @@ -261,27 +290,27 @@ public boolean equals(final Object o) { return cigarElements.equals(cigar.cigarElements); } - + /** build a new Cigar object from a list of cigar operators. * This can be used if you have the operators associated to * each base in the read. - * + * * e.g: read length =10 with cigar= [M,M,M,M,M,M,M,M,M,M], here * fromCigarOperators would generate the cigar '10M' - * + * * later the user resolved the 'M' to '=' or 'X', the array is now - * + * * [=,=,=,=,=,X,X,=,=,=] - * + * * fromCigarOperators would generate the cigar '5M2X3M' - * + * * */ public static Cigar fromCigarOperators(final List cigarOperators) { if (cigarOperators == null) throw new IllegalArgumentException("cigarOperators is null"); final List cigarElementList = new ArrayList<>(); int i = 0; // find adjacent operators and build list of cigar elements - while (i < cigarOperators.size() ) { + while (i < cigarOperators.size()) { final CigarOperator currentOp = cigarOperators.get(i); int j = i + 1; while (j < cigarOperators.size() && cigarOperators.get(j).equals(currentOp)) { @@ -299,31 +328,31 @@ public static Cigar fromCigarOperators(final List cigarOperators) * @param cigarString A SAM formatted CIGAR string. * @return a new Cigar */ - public static Cigar fromCigarString(String cigarString){ + public static Cigar fromCigarString(String cigarString) { return TextCigarCodec.decode(cigarString); } - + /** shortcut to getCigarElements().iterator() */ @Override public Iterator iterator() { return this.getCigarElements().iterator(); } - + /** returns true if the cigar string contains the given operator */ public boolean containsOperator(final CigarOperator operator) { - return this.cigarElements.stream().anyMatch( element -> element.getOperator() == operator); + return this.cigarElements.stream().anyMatch(element -> element.getOperator() == operator); } - + /** returns the first cigar element */ public CigarElement getFirstCigarElement() { - return isEmpty() ? null : this.cigarElements.get(0); + return isEmpty() ? null : this.cigarElements.get(0); } - + /** returns the last cigar element */ public CigarElement getLastCigarElement() { - return isEmpty() ? null : this.cigarElements.get(this.numCigarElements() - 1 ); + return isEmpty() ? null : this.cigarElements.get(this.numCigarElements() - 1); } - + /** returns true if the cigar string starts With a clipping operator */ public boolean isLeftClipped() { return !isEmpty() && isClippingOperator(getFirstCigarElement().getOperator()); @@ -338,7 +367,7 @@ public boolean isRightClipped() { public boolean isClipped() { return isLeftClipped() || isRightClipped(); } - + @Override public int hashCode() { return cigarElements.hashCode(); diff --git a/src/main/java/htsjdk/samtools/CigarElement.java b/src/main/java/htsjdk/samtools/CigarElement.java index 016956c565..2dc761d54a 100644 --- a/src/main/java/htsjdk/samtools/CigarElement.java +++ b/src/main/java/htsjdk/samtools/CigarElement.java @@ -36,7 +36,10 @@ public class CigarElement implements Serializable { private final CigarOperator operator; public CigarElement(final int length, final CigarOperator operator) { - if (length < 0) throw new IllegalArgumentException(String.format("Cigar element being constructed with negative length: %d and operation: %s" , length, operator.name())); + if (length < 0) + throw new IllegalArgumentException(String.format( + "Cigar element being constructed with negative length: %d and operation: %s", + length, operator.name())); this.length = length; this.operator = operator; } @@ -68,9 +71,9 @@ public int hashCode() { result = 31 * result + (operator != null ? operator.hashCode() : 0); return result; } - + @Override public String toString() { - return String.valueOf(this.length)+this.operator; + return String.valueOf(this.length) + this.operator; } } diff --git a/src/main/java/htsjdk/samtools/CigarOperator.java b/src/main/java/htsjdk/samtools/CigarOperator.java index 46ea539d4a..af1d5c684e 100644 --- a/src/main/java/htsjdk/samtools/CigarOperator.java +++ b/src/main/java/htsjdk/samtools/CigarOperator.java @@ -28,24 +28,23 @@ */ public enum CigarOperator { /** Match or mismatch */ - M(true, true, 'M'), + M(true, true, 'M'), /** Insertion vs. the reference. */ - I(true, false, 'I'), + I(true, false, 'I'), /** Deletion vs. the reference. */ - D(false, true, 'D'), + D(false, true, 'D'), /** Skipped region from the reference. */ - N(false, true, 'N'), + N(false, true, 'N'), /** Soft clip. */ - S(true, false, 'S'), + S(true, false, 'S'), /** Hard clip. */ H(false, false, 'H'), /** Padding. */ P(false, false, 'P'), /** Matches the reference. */ - EQ(true, true, '='), + EQ(true, true, '='), /** Mismatches the reference. */ - X(true, true, 'X') - ; + X(true, true, 'X'); // Representation of CigarOperator in BAM file private static final byte OP_M = 0; @@ -81,10 +80,14 @@ public enum CigarOperator { } /** If true, represents that this cigar operator "consumes" bases from the read bases. */ - public boolean consumesReadBases() { return consumesReadBases; } + public boolean consumesReadBases() { + return consumesReadBases; + } /** If true, represents that this cigar operator "consumes" bases from the reference sequence. */ - public boolean consumesReferenceBases() { return consumesReferenceBases; } + public boolean consumesReferenceBases() { + return consumesReferenceBases; + } /** * @param b CIGAR operator in character form as appears in a text CIGAR string @@ -92,26 +95,26 @@ public enum CigarOperator { */ public static CigarOperator characterToEnum(final int b) { switch (b) { - case 'M': - return M; - case 'I': - return I; - case 'D': - return D; - case 'N': - return N; - case 'S': - return S; - case 'H': - return H; - case 'P': - return P; - case '=': - return EQ; - case 'X': - return X; - default: - throw new IllegalArgumentException("Unrecognized CigarOperator: " + b); + case 'M': + return M; + case 'I': + return I; + case 'D': + return D; + case 'N': + return N; + case 'S': + return S; + case 'H': + return H; + case 'P': + return P; + case '=': + return EQ; + case 'X': + return X; + default: + throw new IllegalArgumentException("Unrecognized CigarOperator: " + b); } } @@ -120,7 +123,7 @@ public static CigarOperator characterToEnum(final int b) { * @return CigarOperator enum value corresponding to the given int value. */ public static CigarOperator binaryToEnum(final int i) { - switch(i) { + switch (i) { case OP_M: return M; case OP_I: @@ -150,7 +153,7 @@ public static CigarOperator binaryToEnum(final int i) { * @return CIGAR operator corresponding to the enum value in binary form as appears in a BAMRecord. */ public static int enumToBinary(final CigarOperator e) { - switch(e) { + switch (e) { case M: return OP_M; case I: @@ -198,14 +201,15 @@ public boolean isIndelOrSkippedRegion() { public boolean isAlignment() { return this == M || this == X || this == EQ; } - + /** Returns true if the operator is a Padding operator */ public boolean isPadding() { return this == P; } - + /** Returns the cigar operator as it would be seen in a SAM file. */ - @Override public String toString() { + @Override + public String toString() { return this.string; } } diff --git a/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java b/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java index cb2da892cb..e69c8f06aa 100644 --- a/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java +++ b/src/main/java/htsjdk/samtools/ComparableSamRecordIterator.java @@ -25,7 +25,6 @@ import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.PeekableIterator; - import java.util.Comparator; /** @@ -33,7 +32,8 @@ * The comparison is performed by comparing the next record in the iterator to the next * record in another iterator and returning the ordering between those SAM records. */ -class ComparableSamRecordIterator extends PeekableIterator implements Comparable { +class ComparableSamRecordIterator extends PeekableIterator + implements Comparable { private final Comparator comparator; private final SamReader reader; @@ -44,7 +44,8 @@ class ComparableSamRecordIterator extends PeekableIterator implements * @param iterator the wrapped iterator. * @param comparator the Comparator to use to provide ordering fo SAMRecords */ - public ComparableSamRecordIterator(final SamReader sam, final CloseableIterator iterator, final Comparator comparator) { + public ComparableSamRecordIterator( + final SamReader sam, final CloseableIterator iterator, final Comparator comparator) { super(iterator); this.reader = sam; this.comparator = comparator; @@ -66,8 +67,8 @@ public SamReader getReader() { @Override public int compareTo(final ComparableSamRecordIterator that) { if (this.comparator.getClass() != that.comparator.getClass()) { - throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + - "have different orderings internally"); + throw new IllegalStateException("Attempt to compare two ComparableSAMRecordIterators that " + + "have different orderings internally"); } final SAMRecord record = this.peek(); @@ -85,6 +86,7 @@ public boolean equals(final Object o) { @Override public int hashCode() { - throw new UnsupportedOperationException("ComparableSamRecordIterator should not be hashed because it can change value"); + throw new UnsupportedOperationException( + "ComparableSamRecordIterator should not be hashed because it can change value"); } } diff --git a/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java b/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java index 2c208b4f6c..c16f97c39a 100644 --- a/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java +++ b/src/main/java/htsjdk/samtools/CompressedIndexFileBuffer.java @@ -4,7 +4,6 @@ import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; import java.io.IOException; @@ -22,7 +21,7 @@ class CompressedIndexFileBuffer implements IndexFileBuffer { mCompressedStream = new BlockCompressedInputStream(file); binaryCodec = new BinaryCodec(mCompressedStream); } catch (IOException ioe) { - throw(new RuntimeIOException("Construction error of CSI compressed stream: " + ioe)); + throw (new RuntimeIOException("Construction error of CSI compressed stream: " + ioe)); } } @@ -55,7 +54,7 @@ public void skipBytes(final int count) { try { mCompressedStream.skip(count); } catch (IOException ioe) { - throw(new RuntimeIOException("Skip error in CSI compressed stream: " + ioe)); + throw (new RuntimeIOException("Skip error in CSI compressed stream: " + ioe)); } } @@ -68,7 +67,7 @@ public void seek(final long position) { try { mCompressedStream.seek(position); } catch (IOException ioe) { - throw(new RuntimeIOException("Seek error in CSI compressed stream: " + ioe)); + throw (new RuntimeIOException("Seek error in CSI compressed stream: " + ioe)); } } @@ -90,8 +89,7 @@ public void close() { try { mCompressedStream.close(); } catch (IOException ioe) { - throw(new RuntimeIOException("Close error in CSI compressed stream: " + ioe)); + throw (new RuntimeIOException("Close error in CSI compressed stream: " + ioe)); } } - } diff --git a/src/main/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java b/src/main/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java index c6e0de49e0..047ea65550 100644 --- a/src/main/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java +++ b/src/main/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java @@ -25,7 +25,6 @@ import htsjdk.samtools.util.Murmur3; import htsjdk.samtools.util.PeekableIterator; - import java.util.Iterator; /** @@ -46,7 +45,6 @@ class ConstantMemoryDownsamplingIterator extends DownsamplingIterator { private final int maxHashValue; private final Murmur3 hasher; - /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */ ConstantMemoryDownsamplingIterator(final Iterator iterator, final double proportion, final int seed) { super(proportion); @@ -60,7 +58,8 @@ class ConstantMemoryDownsamplingIterator extends DownsamplingIterator { } /** Returns true if there is another record available post-downsampling, false otherwise. */ - @Override public boolean hasNext() { + @Override + public boolean hasNext() { // The underlying iterator is always left at the next return-able read, so if it has a next read, so do we return this.underlyingIterator.hasNext(); } @@ -70,7 +69,8 @@ class ConstantMemoryDownsamplingIterator extends DownsamplingIterator { * @return true if there is at least one emittable record ready for emission, false otherwise */ private boolean advanceToNextAcceptedRead() { - while (this.underlyingIterator.hasNext() && this.hasher.hashUnencodedChars(this.underlyingIterator.peek().getReadName()) > this.maxHashValue) { + while (this.underlyingIterator.hasNext() + && this.hasher.hashUnencodedChars(this.underlyingIterator.peek().getReadName()) > this.maxHashValue) { this.underlyingIterator.next(); recordDiscardedRecord(); } @@ -79,7 +79,8 @@ private boolean advanceToNextAcceptedRead() { } /** Returns the next record from the iterator, or throws an exception if there is no next record. */ - @Override public SAMRecord next() { + @Override + public SAMRecord next() { final SAMRecord rec = this.underlyingIterator.next(); recordAcceptedRecord(); advanceToNextAcceptedRead(); diff --git a/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java b/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java index f28b003e69..2440185798 100644 --- a/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java +++ b/src/main/java/htsjdk/samtools/CoordinateSortedPairInfoMap.java @@ -27,7 +27,6 @@ import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.FileAppendStreamLRUCache; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -57,6 +56,7 @@ public class CoordinateSortedPairInfoMap implements Iterable mapInRam = null; private final FileAppendStreamLRUCache outputStreams; @@ -120,8 +120,7 @@ private void ensureSequenceLoaded(final int sequenceIndex) { } final Integer numRecords = sizeOfMapOnDisk.remove(sequenceIndex); if (mapOnDisk.exists()) { - if (numRecords == null) - throw new IllegalStateException("null numRecords for " + mapOnDisk); + if (numRecords == null) throw new IllegalStateException("null numRecords for " + mapOnDisk); FileInputStream is = null; try { is = new FileInputStream(mapOnDisk); @@ -129,8 +128,8 @@ private void ensureSequenceLoaded(final int sequenceIndex) { for (int i = 0; i < numRecords; ++i) { final Map.Entry keyAndRecord = elementCodec.decode(); if (mapInRam.containsKey(keyAndRecord.getKey())) - throw new SAMException("Value was put into PairInfoMap more than once. " + - sequenceIndex + ": " + keyAndRecord.getKey()); + throw new SAMException("Value was put into PairInfoMap more than once. " + sequenceIndex + + ": " + keyAndRecord.getKey()); mapInRam.put(keyAndRecord.getKey(), keyAndRecord.getValue()); } } finally { @@ -156,8 +155,8 @@ public void put(final int sequenceIndex, final KEY key, final REC record) { if (sequenceIndex == sequenceIndexOfMapInRam) { // Store in RAM map if (mapInRam.containsKey(key)) - throw new IllegalArgumentException("Putting value into PairInfoMap that already existed. " + - sequenceIndex + ": " + key); + throw new IllegalArgumentException( + "Putting value into PairInfoMap that already existed. " + sequenceIndex + ": " + key); mapInRam.put(key, record); } else { // Append to file @@ -166,7 +165,7 @@ public void put(final int sequenceIndex, final KEY key, final REC record) { elementCodec.encode(key, record); Integer prevCount = sizeOfMapOnDisk.get(sequenceIndex); if (prevCount == null) prevCount = 0; - sizeOfMapOnDisk.put(sequenceIndex, prevCount + 1); + sizeOfMapOnDisk.put(sequenceIndex, prevCount + 1); } } @@ -191,10 +190,10 @@ public int size() { } /** - * @return number of elements stored in RAM. Always <= size() + * @return number of elements stored in RAM. Always {@code <= size()} */ public int sizeInRam() { - return mapInRam != null? mapInRam.size(): 0; + return mapInRam != null ? mapInRam.size() : 0; } /** @@ -216,8 +215,7 @@ private class MapIterator implements CloseableIterator> { private Iterator> currentReferenceIterator = null; private MapIterator() { - if (sequenceIndexOfMapInRam != INVALID_SEQUENCE_INDEX) - referenceIndices.add(sequenceIndexOfMapInRam); + if (sequenceIndexOfMapInRam != INVALID_SEQUENCE_INDEX) referenceIndices.add(sequenceIndexOfMapInRam); referenceIndexIterator = referenceIndices.iterator(); advanceToNextNonEmptyReferenceIndex(); } @@ -297,6 +295,5 @@ public interface Codec { * a record. */ Map.Entry decode(); - } } diff --git a/src/main/java/htsjdk/samtools/CustomReaderFactory.java b/src/main/java/htsjdk/samtools/CustomReaderFactory.java index bb00da74e3..1f16c2f388 100644 --- a/src/main/java/htsjdk/samtools/CustomReaderFactory.java +++ b/src/main/java/htsjdk/samtools/CustomReaderFactory.java @@ -24,120 +24,117 @@ package htsjdk.samtools; import htsjdk.samtools.util.Log; - import java.net.URL; import java.net.URLClassLoader; - /** - * Factory for creating custom readers for accessing API based resources, + * Factory for creating custom readers for accessing API based resources, * e.g. ga4gh. * The configuration is controlled via custom_reader property (@see Defaults). * This allows injection of such readers from code bases outside HTSJDK. */ public class CustomReaderFactory { - private final static Log LOG = Log.getInstance(CustomReaderFactory.class); - /** - * Interface to be implemented by custom factory classes that register - * themselves with this factory and are loaded dynamically. - */ - public interface ICustomReaderFactory { - SamReader open(URL url); - } - - private static final CustomReaderFactory DEFAULT_FACTORY; - private static CustomReaderFactory currentFactory; - - private String urlPrefix = ""; - private String factoryClassName = ""; - private String jarFile = ""; - private ICustomReaderFactory factory; - - static { - DEFAULT_FACTORY = new CustomReaderFactory(); - currentFactory = DEFAULT_FACTORY; - } + private static final Log LOG = Log.getInstance(CustomReaderFactory.class); + /** + * Interface to be implemented by custom factory classes that register + * themselves with this factory and are loaded dynamically. + */ + public interface ICustomReaderFactory { + SamReader open(URL url); + } + + private static final CustomReaderFactory DEFAULT_FACTORY; + private static CustomReaderFactory currentFactory; + + private String urlPrefix = ""; + private String factoryClassName = ""; + private String jarFile = ""; + private ICustomReaderFactory factory; - public static void setInstance(final CustomReaderFactory factory){ - currentFactory = factory; - } - - public static void resetToDefaultInstance() { - setInstance(DEFAULT_FACTORY); - } + static { + DEFAULT_FACTORY = new CustomReaderFactory(); + currentFactory = DEFAULT_FACTORY; + } + + public static void setInstance(final CustomReaderFactory factory) { + currentFactory = factory; + } + + public static void resetToDefaultInstance() { + setInstance(DEFAULT_FACTORY); + } - public static CustomReaderFactory getInstance(){ - return currentFactory; - } - - /** - * Initializes factory based on the custom_reader property specification. - */ - private CustomReaderFactory() { - this(Defaults.CUSTOM_READER_FACTORY); - } - - CustomReaderFactory(String cfg) { - final String[] cfgComponents = cfg.split(","); - if (cfgComponents.length < 2) { - return; + public static CustomReaderFactory getInstance() { + return currentFactory; } - urlPrefix = cfgComponents[0].toLowerCase(); - factoryClassName = cfgComponents[1]; - if (cfgComponents.length > 2) { - jarFile = cfgComponents[2]; + + /** + * Initializes factory based on the custom_reader property specification. + */ + private CustomReaderFactory() { + this(Defaults.CUSTOM_READER_FACTORY); } - } - - /** - * Lazily creates factory based on the configuration. - * @return null if creation fails, factory instance otherwise. - */ - private synchronized ICustomReaderFactory getFactory() { - if (factory == null) { - try { - Class clazz = null; - - if (!jarFile.isEmpty()) { - LOG.info("Attempting to load factory class " + factoryClassName + - " from " + jarFile); - final URL jarURL = new URL("file:///"+jarFile); - clazz = Class.forName(factoryClassName, true, - new URLClassLoader (new URL[] { jarURL }, - this.getClass().getClassLoader())); - } else { - LOG.info("Attempting to load factory class " + factoryClassName); - clazz = Class.forName(factoryClassName); + + CustomReaderFactory(String cfg) { + final String[] cfgComponents = cfg.split(","); + if (cfgComponents.length < 2) { + return; + } + urlPrefix = cfgComponents[0].toLowerCase(); + factoryClassName = cfgComponents[1]; + if (cfgComponents.length > 2) { + jarFile = cfgComponents[2]; } - - factory = (ICustomReaderFactory)clazz.newInstance(); - LOG.info("Created custom factory for " + urlPrefix + " from " + - factoryClassName + " loaded from " + (jarFile.isEmpty() ? - " this jar" : jarFile)); - } catch (Exception e) { - LOG.error(e); - return null; - } } - return factory; - } - - /** - * Check if the url is supposed to be handled by the custom factory and if so - * attempt to create reader via an instance of this custom factory. - * - * @return null if the url is not handled by this factory, SamReader otherwise. - */ - public SamReader maybeOpen(URL url) { - if (urlPrefix.isEmpty() || - !url.toString().toLowerCase().startsWith(urlPrefix)) { - return null; + + /** + * Lazily creates factory based on the configuration. + * @return null if creation fails, factory instance otherwise. + */ + private synchronized ICustomReaderFactory getFactory() { + if (factory == null) { + try { + Class clazz = null; + + if (!jarFile.isEmpty()) { + LOG.info("Attempting to load factory class " + factoryClassName + " from " + jarFile); + final URL jarURL = new URL("file:///" + jarFile); + clazz = Class.forName( + factoryClassName, + true, + new URLClassLoader( + new URL[] {jarURL}, this.getClass().getClassLoader())); + } else { + LOG.info("Attempting to load factory class " + factoryClassName); + clazz = Class.forName(factoryClassName); + } + + factory = (ICustomReaderFactory) clazz.newInstance(); + LOG.info("Created custom factory for " + urlPrefix + " from " + factoryClassName + " loaded from " + + (jarFile.isEmpty() ? " this jar" : jarFile)); + } catch (Exception e) { + LOG.error(e); + return null; + } + } + return factory; } - LOG.info("Attempting to open " + url + " with custom factory"); - final ICustomReaderFactory factory = getFactory(); - if (factory == null) { - return null; + + /** + * Check if the url is supposed to be handled by the custom factory and if so + * attempt to create reader via an instance of this custom factory. + * + * @return null if the url is not handled by this factory, SamReader otherwise. + */ + public SamReader maybeOpen(URL url) { + if (urlPrefix.isEmpty() || !url.toString().toLowerCase().startsWith(urlPrefix)) { + return null; + } + LOG.info("Attempting to open " + url + " with custom factory"); + final ICustomReaderFactory factory = getFactory(); + if (factory == null) { + return null; + } + return factory.open(url); } - return factory.open(url); - } } diff --git a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java index 707cc6ec12..6df651db31 100644 --- a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java +++ b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java @@ -8,9 +8,9 @@ public class DefaultSAMRecordFactory implements SAMRecordFactory { private static final DefaultSAMRecordFactory INSTANCE = new DefaultSAMRecordFactory(); - + public static DefaultSAMRecordFactory getInstance() { - return INSTANCE; + return INSTANCE; } /** Create a new SAMRecord to be filled in */ @@ -25,32 +25,34 @@ public SAMRecord createSAMRecord(final SAMFileHeader header) { * dictionary in the header argument. */ @Override - public BAMRecord createBAMRecord (final SAMFileHeader header, - final int referenceSequenceIndex, - final int alignmentStart, - final short readNameLength, - final short mappingQuality, - final int indexingBin, - final int cigarLen, - final int flags, - final int readLen, - final int mateReferenceSequenceIndex, - final int mateAlignmentStart, - final int insertSize, - final byte[] variableLengthBlock) { + public BAMRecord createBAMRecord( + final SAMFileHeader header, + final int referenceSequenceIndex, + final int alignmentStart, + final short readNameLength, + final short mappingQuality, + final int indexingBin, + final int cigarLen, + final int flags, + final int readLen, + final int mateReferenceSequenceIndex, + final int mateAlignmentStart, + final int insertSize, + final byte[] variableLengthBlock) { - return new BAMRecord(header, - referenceSequenceIndex, - alignmentStart, - readNameLength, - mappingQuality, - indexingBin, - cigarLen, - flags, - readLen, - mateReferenceSequenceIndex, - mateAlignmentStart, - insertSize, - variableLengthBlock); + return new BAMRecord( + header, + referenceSequenceIndex, + alignmentStart, + readNameLength, + mappingQuality, + indexingBin, + cigarLen, + flags, + readLen, + mateReferenceSequenceIndex, + mateAlignmentStart, + insertSize, + variableLengthBlock); } } diff --git a/src/main/java/htsjdk/samtools/Defaults.java b/src/main/java/htsjdk/samtools/Defaults.java index 4bf9d87729..948be53815 100644 --- a/src/main/java/htsjdk/samtools/Defaults.java +++ b/src/main/java/htsjdk/samtools/Defaults.java @@ -1,7 +1,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.Log; - import java.io.File; import java.util.Collections; import java.util.Optional; @@ -15,8 +14,8 @@ * @author Tim Fennell */ public class Defaults { - private static final Log log = Log.getInstance(Defaults.class); - + private static final Log log = Log.getInstance(Defaults.class); + /** Should BAM index files be created when writing out coordinate sorted BAM files? Default = false. */ public static final boolean CREATE_INDEX; @@ -97,11 +96,11 @@ public class Defaults { public static final String EBI_REFERENCE_SERVICE_URL_MASK; /** - * Boolean describing whether downloading of SRA native libraries is allowed, - * in case such native libraries are not found locally. Default = false. + * Whether to attempt to use jlibdeflate (libdeflate via JNI) for DEFLATE compression and decompression. + * When true, the default deflater/inflater factories will try to load the native library and fall back + * to the JDK implementation if it is not available. Default = true. */ - public static final boolean SRA_LIBRARIES_DOWNLOAD; - + public static final boolean USE_LIBDEFLATE; /** * The name of the system property that disables snappy. Default = "snappy.disable". @@ -110,13 +109,13 @@ public class Defaults { public static final String OPTIMISTIC_VCF_4_4_PROPERTY = "optimistic_vcf_4_4"; - /** * Disable use of the Snappy compressor. Default = false. */ public static final boolean DISABLE_SNAPPY_COMPRESSOR; public static final String SAMJDK_PREFIX = "samjdk."; + static { CREATE_INDEX = getBooleanProperty("create_index", false); CREATE_MD5 = getBooleanProperty("create_md5", false); @@ -136,8 +135,9 @@ public class Defaults { USE_CRAM_REF_DOWNLOAD = getBooleanProperty("use_cram_ref_download", false); EBI_REFERENCE_SERVICE_URL_MASK = "https://www.ebi.ac.uk/ena/cram/md5/%s"; CUSTOM_READER_FACTORY = getStringProperty("custom_reader", ""); - SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name())); - SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false); + SAM_FLAG_FIELD_FORMAT = + SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name())); + USE_LIBDEFLATE = getBooleanProperty("use_libdeflate", true); DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false); OPTIMISTIC_VCF_4_4 = getBooleanProperty(OPTIMISTIC_VCF_4_4_PROPERTY, false); } @@ -147,7 +147,7 @@ public class Defaults { * The returned map is unmodifiable. * This function is useful for example when logging all defaults. */ - public static SortedMap allDefaults(){ + public static SortedMap allDefaults() { final SortedMap result = new TreeMap<>(); result.put("CREATE_INDEX", CREATE_INDEX); result.put("CREATE_MD5", CREATE_MD5); @@ -162,18 +162,22 @@ public static SortedMap allDefaults(){ result.put("EBI_REFERENCE_SERVICE_URL_MASK", EBI_REFERENCE_SERVICE_URL_MASK); result.put("CUSTOM_READER_FACTORY", CUSTOM_READER_FACTORY); result.put("SAM_FLAG_FIELD_FORMAT", SAM_FLAG_FIELD_FORMAT); + result.put("USE_LIBDEFLATE", USE_LIBDEFLATE); result.put("DISABLE_SNAPPY_COMPRESSOR", DISABLE_SNAPPY_COMPRESSOR); return Collections.unmodifiableSortedMap(result); } - /** Gets a string system property, prefixed with "samjdk." using the default + /** Gets a string system property, prefixed with "samjdk." using the default * if the property does not exist or if the java.security manager raises an exception for * applications started with -Djava.security.manager . */ private static String getStringProperty(final String name, final String def) { try { return System.getProperty(Defaults.SAMJDK_PREFIX + name, def); } catch (final SecurityException error) { - log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning default value: " + def ); + log.warn( + error, + "java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning default value: " + + def); return def; } } @@ -181,11 +185,11 @@ private static String getStringProperty(final String name, final String def) { /** Checks whether a string system property, prefixed with "samjdk.", exists. * If the property does not exist or if the java.security manager raises an exception for * applications started with -Djava.security.manager this method returns false. */ - private static boolean hasProperty(final String name){ + private static boolean hasProperty(final String name) { try { return null != System.getProperty(Defaults.SAMJDK_PREFIX + name); } catch (final SecurityException error) { - log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning false"); + log.warn(error, "java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning false"); return false; } } @@ -208,7 +212,9 @@ private static File getFileProperty(final String name, final String def) { Optional maybeFile = Optional.ofNullable(value).map(File::new); maybeFile.ifPresent(f -> { if (!f.exists()) { - log.warn(String.format("File property for %s has value %s. However file %s doesn't exist.", SAMJDK_PREFIX + name, value, f.getAbsolutePath())); + log.warn(String.format( + "File property for %s has value %s. However file %s doesn't exist.", + SAMJDK_PREFIX + name, value, f.getAbsolutePath())); } else { log.info(String.format("Found file for property %s: %s ", SAMJDK_PREFIX + name, f.getAbsolutePath())); } diff --git a/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java b/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java index 1eddddde31..321b26dbe9 100644 --- a/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java +++ b/src/main/java/htsjdk/samtools/DiskBasedBAMFileIndex.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.File; import java.util.ArrayList; import java.util.List; @@ -32,8 +31,7 @@ /** * A class for reading BAM file indices, hitting the disk once per query. */ -public class DiskBasedBAMFileIndex extends AbstractBAMFileIndex -{ +public class DiskBasedBAMFileIndex extends AbstractBAMFileIndex { public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) { super(file, dictionary); } @@ -42,7 +40,8 @@ public DiskBasedBAMFileIndex(final SeekableStream stream, final SAMSequenceDicti super(stream, dictionary); } - public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { + public DiskBasedBAMFileIndex( + final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) { super(file, dictionary, useMemoryMapping); } @@ -58,23 +57,23 @@ public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictio */ @Override public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) { - final BAMIndexContent queryResults = query(referenceIndex,startPos,endPos); + final BAMIndexContent queryResults = query(referenceIndex, startPos, endPos); - if(queryResults == null) - return null; + if (queryResults == null) return null; List chunkList = new ArrayList(); - for(final Chunk chunk: queryResults.getAllChunks()) - chunkList.add(chunk.clone()); - chunkList = Chunk.optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos)); + for (final Chunk chunk : queryResults.getAllChunks()) chunkList.add(chunk.clone()); + chunkList = + Chunk.optimizeChunkList(chunkList, queryResults.getLinearIndex().getMinimumOffset(startPos)); return new BAMFileSpan(chunkList); } - @Override - protected BAMIndexContent getQueryResults(final int reference){ - throw new UnsupportedOperationException(); - // todo: there ought to be a way to support this using the first startPos for the reference and the last - // return query(reference, 1, -1); - // If this were implemented, BAMIndexer.createAndWriteIndex could extend DiskBasedBAMFileIndex -or- CachingBAMFileIndex + @Override + protected BAMIndexContent getQueryResults(final int reference) { + throw new UnsupportedOperationException(); + // todo: there ought to be a way to support this using the first startPos for the reference and the last + // return query(reference, 1, -1); + // If this were implemented, BAMIndexer.createAndWriteIndex could extend DiskBasedBAMFileIndex -or- + // CachingBAMFileIndex } } diff --git a/src/main/java/htsjdk/samtools/DownsamplingIterator.java b/src/main/java/htsjdk/samtools/DownsamplingIterator.java index 8ca0d84cae..5056964f40 100644 --- a/src/main/java/htsjdk/samtools/DownsamplingIterator.java +++ b/src/main/java/htsjdk/samtools/DownsamplingIterator.java @@ -49,22 +49,35 @@ public DownsamplingIterator(final double targetProportion) { } /** Does nothing. */ - @Override public void close() { /** No Op. */ } + @Override + public void close() { + /** No Op. */ + } /** Returns the number of records seen, including accepted and discarded, since creation of the last call to resetStatistics. */ - public long getSeenCount() { return this.recordsSeen; } + public long getSeenCount() { + return this.recordsSeen; + } /** Returns the number of records returned since creation of the last call to resetStatistics. */ - public long getAcceptedCount() { return this.recordsAccepted; } + public long getAcceptedCount() { + return this.recordsAccepted; + } /** Returns the number of records discarded since creation of the last call to resetStatistics. */ - public long getDiscardedCount() { return this.recordsSeen - this.recordsAccepted; } + public long getDiscardedCount() { + return this.recordsSeen - this.recordsAccepted; + } /** Gets the fraction of records discarded since creation or the last call to resetStatistics(). */ - public double getDiscardedFraction() { return getDiscardedCount() / (double) getSeenCount(); } + public double getDiscardedFraction() { + return getDiscardedCount() / (double) getSeenCount(); + } /** Gets the fraction of records accepted since creation or the last call to resetStatistics(). */ - public double getAcceptedFraction() { return getAcceptedCount() / (double) getSeenCount(); } + public double getAcceptedFraction() { + return getAcceptedCount() / (double) getSeenCount(); + } /** Resets the statistics for records seen/accepted/discarded. */ public void resetStatistics() { @@ -78,13 +91,18 @@ public double getTargetProportion() { } /** Method for subclasses to record a record as being discarded. */ - protected final void recordDiscardedRecord() { this.recordsSeen++; } + protected final void recordDiscardedRecord() { + this.recordsSeen++; + } /** * Method for subclasses to record a specific record as being accepted. Null may be passed if a record * was discarded but access to the object is no longer available. */ - protected final void recordAcceptedRecord() { this.recordsSeen++; this.recordsAccepted++; } + protected final void recordAcceptedRecord() { + this.recordsSeen++; + this.recordsAccepted++; + } /** Record one or more records as having been discarded. */ protected final void recordDiscardRecords(final long n) { @@ -107,7 +125,8 @@ public boolean isHigherAccuracy() { } /** Not supported. */ - @Override public void remove() { + @Override + public void remove() { throw new UnsupportedOperationException("remove() not supported in DownsamplingIterators"); } } diff --git a/src/main/java/htsjdk/samtools/DownsamplingIteratorFactory.java b/src/main/java/htsjdk/samtools/DownsamplingIteratorFactory.java index 66debdaba9..6c7c0c7014 100644 --- a/src/main/java/htsjdk/samtools/DownsamplingIteratorFactory.java +++ b/src/main/java/htsjdk/samtools/DownsamplingIteratorFactory.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.util.Iterator; @@ -36,24 +35,24 @@ */ public class DownsamplingIteratorFactory { public static final String HIGH_ACCURACY_DESCRIPTION = - "Attempts (but does not guarantee) to provide accuracy up to a specified limit. Accuracy is defined as emitting " + - "a proportion of reads as close to the requested proportion as possible. In order to do so this strategy requires " + - "memory that is proportional to the number of template names in the incoming stream of reads, and will thus require " + - "large amounts of memory when running on large input files."; + "Attempts (but does not guarantee) to provide accuracy up to a specified limit. Accuracy is defined as emitting " + + "a proportion of reads as close to the requested proportion as possible. In order to do so this strategy requires " + + "memory that is proportional to the number of template names in the incoming stream of reads, and will thus require " + + "large amounts of memory when running on large input files."; public static final String CONSTANT_MEMORY_DESCRPTION = - "Downsamples a stream or file of SAMRecords using a hash-projection strategy such that it can run in constant memory. " + - "The downsampling is stochastic, and therefore the actual retained proportion will vary around the requested proportion. Due " + - "to working in fixed memory this strategy is good for large inputs, and due to the stochastic nature the accuracy of this strategy " + - "is highest with a high number of output records, and diminishes at low output volumes."; + "Downsamples a stream or file of SAMRecords using a hash-projection strategy such that it can run in constant memory. " + + "The downsampling is stochastic, and therefore the actual retained proportion will vary around the requested proportion. Due " + + "to working in fixed memory this strategy is good for large inputs, and due to the stochastic nature the accuracy of this strategy " + + "is highest with a high number of output records, and diminishes at low output volumes."; public static final String CHAINED_DESCRIPTION = - "Attempts to provide a compromise strategy that offers some of the advantages of both the ConstantMemory and HighAccuracy strategies. " + - "Uses a ConstantMemory strategy to downsample the incoming stream to approximately the desired proportion, and then a HighAccuracy " + - "strategy to finish. Works in a single pass, and will provide accuracy close to (but often not as good as) HighAccuracy while requiring " + - "memory proportional to the set of reads emitted from the ConstantMemory strategy to the HighAccuracy strategy. Works well when downsampling " + - "large inputs to small proportions (e.g. downsampling hundreds of millions of reads and retaining only 2%. Should be accurate 99.9% of the time " + - "when the input contains more than 50,000 templates (read names). For smaller inputs, HighAccuracy is recommended instead."; + "Attempts to provide a compromise strategy that offers some of the advantages of both the ConstantMemory and HighAccuracy strategies. " + + "Uses a ConstantMemory strategy to downsample the incoming stream to approximately the desired proportion, and then a HighAccuracy " + + "strategy to finish. Works in a single pass, and will provide accuracy close to (but often not as good as) HighAccuracy while requiring " + + "memory proportional to the set of reads emitted from the ConstantMemory strategy to the HighAccuracy strategy. Works well when downsampling " + + "large inputs to small proportions (e.g. downsampling hundreds of millions of reads and retaining only 2%. Should be accurate 99.9% of the time " + + "when the input contains more than 50,000 templates (read names). For smaller inputs, HighAccuracy is recommended instead."; /** Describes the available downsampling strategies. */ public enum Strategy { @@ -85,17 +84,26 @@ public String getDescription() { * is within proportion +/0 0.0001. * @param seed The seed value to use for any random process used in down-sampling. */ - public static DownsamplingIterator make(final Iterator iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) { + public static DownsamplingIterator make( + final Iterator iterator, + final Strategy strategy, + final double proportion, + final double accuracy, + final int seed) { if (strategy == null) throw new IllegalArgumentException("strategy may not be null"); if (iterator == null) throw new IllegalArgumentException("iterator may not be null"); if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0"); if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1"); switch (strategy) { - case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); - case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); - case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); - default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); + case HighAccuracy: + return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); + case ConstantMemory: + return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); + case Chained: + return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); + default: + throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); } } @@ -103,7 +111,12 @@ public static DownsamplingIterator make(final Iterator iterator, fina * Convenience method that constructs a downsampling iterator for all the reads in a SAM file. * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information. */ - public static DownsamplingIterator make(final File samFile, final Strategy strategy, final double proportion, final double accuracy, final int seed) { + public static DownsamplingIterator make( + final File samFile, + final Strategy strategy, + final double proportion, + final double accuracy, + final int seed) { IOUtil.assertFileIsReadable(samFile); return make(SamReaderFactory.makeDefault().open(samFile), strategy, proportion, accuracy, seed); } @@ -112,7 +125,12 @@ public static DownsamplingIterator make(final File samFile, final Strategy strat * Convenience method that constructs a downsampling iterator for all the reads available from a SamReader. * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information. */ - public static DownsamplingIterator make(final SamReader reader, final Strategy strategy, final double proportion, final double accuracy, final int seed) { + public static DownsamplingIterator make( + final SamReader reader, + final Strategy strategy, + final double proportion, + final double accuracy, + final int seed) { return make(reader.iterator(), strategy, proportion, accuracy, seed); } } diff --git a/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java b/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java index 26c83a5842..e63009ab29 100644 --- a/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java +++ b/src/main/java/htsjdk/samtools/DuplicateScoringStrategy.java @@ -43,7 +43,9 @@ public enum ScoringStrategy { private static final Murmur3 hasher = new Murmur3(1); /** An enum to use for storing temporary attributes on SAMRecords. */ - private static enum Attr { DuplicateScore } + private static enum Attr { + DuplicateScore + } /** Calculates a score for the read which is the sum of scores over Q15. */ private static int getSumOfBaseQualities(final SAMRecord rec) { @@ -70,11 +72,12 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS * If true is given to assumeMateCigar, then any score that can use the mate cigar to compute the mate's score will return the score * computed on both ends. */ - public static short computeDuplicateScore(final SAMRecord record, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) { + public static short computeDuplicateScore( + final SAMRecord record, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) { Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore); if (storedScore == null) { - short score=0; + short score = 0; switch (scoringStrategy) { case SUM_OF_BASE_QUALITIES: // two (very) long reads worth of high-quality bases can go over Short.MAX_VALUE/2 @@ -87,7 +90,8 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS score = (short) Math.min(record.getCigar().getReferenceLength(), Short.MAX_VALUE / 2); } if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) { - score += (short) Math.min(SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2); + score += (short) + Math.min(SAMUtils.getMateCigar(record).getReferenceLength(), Short.MAX_VALUE / 2); } break; // The RANDOM score gives the same score to both reads so that they get filtered together. @@ -122,15 +126,20 @@ public static short computeDuplicateScore(final SAMRecord record, final ScoringS * If true is given to assumeMateCigar, then any score that can use the mate cigar to to compute the mate's score will return the score * computed on both ends. * - * We allow different scoring strategies. We return <0 if rec1 has a better strategy than rec2. + * We allow different scoring strategies. We return {@code <0} if rec1 has a better strategy than rec2. */ - public static int compare(final SAMRecord rec1, final SAMRecord rec2, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) { + public static int compare( + final SAMRecord rec1, + final SAMRecord rec2, + final ScoringStrategy scoringStrategy, + final boolean assumeMateCigar) { int cmp; // always prefer paired over non-paired if (rec1.getReadPairedFlag() != rec2.getReadPairedFlag()) return rec1.getReadPairedFlag() ? -1 : 1; - cmp = computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar) - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar); + cmp = computeDuplicateScore(rec2, scoringStrategy, assumeMateCigar) + - computeDuplicateScore(rec1, scoringStrategy, assumeMateCigar); /** * Finally, use library ID and read name @@ -147,10 +156,9 @@ public static int compare(final SAMRecord rec1, final SAMRecord rec2, final Scor * pre-computed by computeDuplicateScore and stored in the "DS" tag. If the scores are equal, we break * ties based on mapping quality (added to the mate's mapping quality if paired and mapped), then library/read name. * - * We allow different scoring strategies. We return <0 if rec1 has a better strategy than rec2. + * We allow different scoring strategies. We return {@code <0} if rec1 has a better strategy than rec2. */ public static int compare(final SAMRecord rec1, final SAMRecord rec2, final ScoringStrategy scoringStrategy) { return compare(rec1, rec2, scoringStrategy, false); } - } diff --git a/src/main/java/htsjdk/samtools/DuplicateSet.java b/src/main/java/htsjdk/samtools/DuplicateSet.java index 83330695fa..8af88c1b89 100644 --- a/src/main/java/htsjdk/samtools/DuplicateSet.java +++ b/src/main/java/htsjdk/samtools/DuplicateSet.java @@ -28,9 +28,9 @@ import java.util.List; /** - * Stores a set of records that are duplicates of each other. The first records in the list of records is - * considered the representative of the duplicate, and typically does not have it's duplicate flag set. - * The records' duplicate flag will be set appropriately as records are added. This behavior can be + * Stores a set of records that are duplicates of each other. The first records in the list of records is + * considered the representative of the duplicate, and typically does not have it's duplicate flag set. + * The records' duplicate flag will be set appropriately as records are added. This behavior can be * turned off. * * At this time, this set does not track optical duplicates. @@ -88,8 +88,7 @@ public int add(final SAMRecord record) { if (0 < this.comparator.compare(this.representative, record)) { this.representative = record; } - } - else { + } else { this.representative = record; } @@ -108,7 +107,9 @@ private void sort() { if (setDuplicateFlag) { // reset duplicate flags for (final SAMRecord record : records) { - if (!record.getReadUnmappedFlag() && !record.isSecondaryOrSupplementary() && !record.getReadName().equals(representative.getReadName())) { + if (!record.getReadUnmappedFlag() + && !record.isSecondaryOrSupplementary() + && !record.getReadName().equals(representative.getReadName())) { record.setDuplicateReadFlag(true); } } @@ -116,8 +117,8 @@ private void sort() { } if (!records.get(0).equals(this.representative)) { - throw new SAMException("BUG: the representative was not the first record after sorting." - + "\nFIRST: " + records.get(0).getSAMString() + "\nSECOND: " + this.representative.getSAMString()); + throw new SAMException("BUG: the representative was not the first record after sorting." + "\nFIRST: " + + records.get(0).getSAMString() + "\nSECOND: " + this.representative.getSAMString()); } } needsSorting = false; // this could be in the if above if you think hard about it @@ -180,5 +181,7 @@ public boolean isEmpty() { /** * Controls if we should update the duplicate flag of the records in this set. */ - public void setDuplicateFlag(final boolean setDuplicateFlag) { this.setDuplicateFlag = setDuplicateFlag; } -} \ No newline at end of file + public void setDuplicateFlag(final boolean setDuplicateFlag) { + this.setDuplicateFlag = setDuplicateFlag; + } +} diff --git a/src/main/java/htsjdk/samtools/DuplicateSetIterator.java b/src/main/java/htsjdk/samtools/DuplicateSetIterator.java index becbea6734..02b34394fb 100644 --- a/src/main/java/htsjdk/samtools/DuplicateSetIterator.java +++ b/src/main/java/htsjdk/samtools/DuplicateSetIterator.java @@ -27,9 +27,7 @@ import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; import htsjdk.samtools.util.SortingCollection; - import java.io.File; -import java.util.Collections; /** * An iterator of sets of duplicates. Duplicates are defined currently by the ordering in @@ -52,16 +50,17 @@ public class DuplicateSetIterator implements CloseableIterator { public DuplicateSetIterator(final CloseableIterator iterator, final SAMFileHeader header) { this(iterator, header, false); } - public DuplicateSetIterator(final CloseableIterator iterator, - final SAMFileHeader header, - final boolean preSorted) { + + public DuplicateSetIterator( + final CloseableIterator iterator, final SAMFileHeader header, final boolean preSorted) { this(iterator, header, preSorted, null); } - public DuplicateSetIterator(final CloseableIterator iterator, - final SAMFileHeader header, - final boolean preSorted, - final SAMRecordDuplicateComparator comparator) { + public DuplicateSetIterator( + final CloseableIterator iterator, + final SAMFileHeader header, + final boolean preSorted, + final SAMRecordDuplicateComparator comparator) { this(iterator, header, preSorted, comparator, null); } @@ -70,11 +69,12 @@ public DuplicateSetIterator(final CloseableIterator iterator, * sorted but not actually sorted in the correct order, an exception during iteration will be thrown. Progress information will * be printed for sorting of the input if `log` is provided. */ - public DuplicateSetIterator(final CloseableIterator iterator, - final SAMFileHeader header, - final boolean preSorted, - final SAMRecordDuplicateComparator comparator, - final Log log) { + public DuplicateSetIterator( + final CloseableIterator iterator, + final SAMFileHeader header, + final boolean preSorted, + final SAMRecordDuplicateComparator comparator, + final Log log) { this.comparator = (comparator == null) ? new SAMRecordDuplicateComparator(header) : comparator; if (preSorted) { @@ -89,9 +89,8 @@ public DuplicateSetIterator(final CloseableIterator iterator, // Sort it! final int maxRecordsInRam = SAMFileWriterImpl.getDefaultMaxRecordsInRam(); final File tmpDir = new File(System.getProperty("java.io.tmpdir")); - final SortingCollection alignmentSorter = SortingCollection.newInstance(SAMRecord.class, - new BAMRecordCodec(header), this.comparator, - maxRecordsInRam, tmpDir); + final SortingCollection alignmentSorter = SortingCollection.newInstance( + SAMRecord.class, new BAMRecordCodec(header), this.comparator, maxRecordsInRam, tmpDir); while (iterator.hasNext()) { final SAMRecord record = iterator.next(); @@ -109,12 +108,11 @@ public DuplicateSetIterator(final CloseableIterator iterator, if (hasNext()) { this.duplicateSet.add(this.wrappedIterator.next()); } - } @Deprecated /** @deprecated Do not use this method as the first duplicate set will not be compared with this scoring strategy. - * Instead, provide a comparator to the constructor that has the scoring strategy set. */ + * Instead, provide a comparator to the constructor that has the scoring strategy set. */ public void setScoringStrategy(final DuplicateScoringStrategy.ScoringStrategy scoringStrategy) { this.comparator.setScoringStrategy(scoringStrategy); } @@ -147,8 +145,8 @@ public DuplicateSet next() { cmp = this.duplicateSet.add(record); if (0 < cmp) { - throw new SAMException("The input records were not sorted in duplicate order:\n" + - representative.getSAMString() + record.getSAMString()); + throw new SAMException("The input records were not sorted in duplicate order:\n" + + representative.getSAMString() + "\n" + record.getSAMString()); } else if (cmp < 0) { duplicateSet = this.duplicateSet; this.duplicateSet = new DuplicateSet(this.comparator); @@ -162,7 +160,9 @@ public DuplicateSet next() { } @Override - public void close() { wrappedIterator.close(); } + public void close() { + wrappedIterator.close(); + } @Override public boolean hasNext() { @@ -171,5 +171,5 @@ public boolean hasNext() { // Does nothing! @Override - public void remove() { } + public void remove() {} } diff --git a/src/main/java/htsjdk/samtools/FileTruncatedException.java b/src/main/java/htsjdk/samtools/FileTruncatedException.java index 0adf799c5e..0a7f66802c 100644 --- a/src/main/java/htsjdk/samtools/FileTruncatedException.java +++ b/src/main/java/htsjdk/samtools/FileTruncatedException.java @@ -29,8 +29,7 @@ * @author alecw@broadinstitute.org */ public class FileTruncatedException extends SAMException { - public FileTruncatedException() { - } + public FileTruncatedException() {} public FileTruncatedException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/GenomicIndexUtil.java b/src/main/java/htsjdk/samtools/GenomicIndexUtil.java index 275444e113..cf1c52316f 100644 --- a/src/main/java/htsjdk/samtools/GenomicIndexUtil.java +++ b/src/main/java/htsjdk/samtools/GenomicIndexUtil.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.utils.ValidationUtils; - import java.util.BitSet; /** @@ -34,20 +33,19 @@ public class GenomicIndexUtil { /** * Reports the total amount of genomic data that any bin can index. */ - public static final int BIN_GENOMIC_SPAN = 512*1024*1024; + public static final int BIN_GENOMIC_SPAN = 512 * 1024 * 1024; /** * What is the starting bin for each level? */ - public static final int[] LEVEL_STARTS = {0,1,9,73,585,4681}; + public static final int[] LEVEL_STARTS = {0, 1, 9, 73, 585, 4681}; /** * Reports the maximum number of bins that can appear in a binning index. */ - public static final int MAX_BINS = 37450; // =(8^6-1)/7+1 - - public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1]; + public static final int MAX_BINS = 37450; // =(8^6-1)/7+1 + public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS + 1 - LEVEL_STARTS[LEVEL_STARTS.length - 1]; /** * E.g. for a SAMRecord with no genomic coordinate. @@ -60,10 +58,10 @@ public class GenomicIndexUtil { * @return the binning index level for the given bin */ public static int binTolevel(int bin) { - ValidationUtils.validateArg(bin >=0 && bin <= MAX_BINS, "Bin number must be >=0 and <= 37450"); + ValidationUtils.validateArg(bin >= 0 && bin <= MAX_BINS, "Bin number must be >=0 and <= 37450"); // As described in Tabix: fast retrieval of sequence features from generic TAB-delimited files. // doi: 10.1093/bioinformatics/btq671 - return (int) Math.floor(((Math.log((7*bin) + 1) / Math.log(2)) / 3)); + return (int) Math.floor(((Math.log((7 * bin) + 1) / Math.log(2)) / 3)); } /** @@ -73,10 +71,10 @@ public static int binTolevel(int bin) { * @return the size for a bin at the given level */ public static int levelToSize(int level) { - ValidationUtils.validateArg(level >=0 && level <= 5, "Level number must be >=0 and <= 5"); + ValidationUtils.validateArg(level >= 0 && level <= 5, "Level number must be >=0 and <= 5"); // As described in Tabix: fast retrieval of sequence features from generic TAB-delimited files. // doi: 10.1093/bioinformatics/btq671 - return (int) Math.pow(2, 29-(3*level)); + return (int) Math.pow(2, 29 - (3 * level)); } /** @@ -91,54 +89,48 @@ public static String getBinSummaryString(int bin) { final int level = binTolevel(bin); final int levelStart = LEVEL_STARTS[level]; final int binSize = levelToSize(level); - final int binStart = (bin-levelStart) * binSize; - return String.format("bin=%d, level=%d, first bin=%d, bin size=%,d bin range=(%,d-%,d)", - bin, - level, - levelStart, - binSize, - binStart, - binStart + binSize); + final int binStart = (bin - levelStart) * binSize; + return String.format( + "bin=%d, level=%d, first bin=%d, bin size=%,d bin range=(%,d-%,d)", + bin, level, levelStart, binSize, binStart, binStart + binSize); } /** * calculate the bin given an alignment in [beg,end) - * Described in "The Human Genome Browser at UCSC. Kent & al. doi: 10.1101/gr.229102 " + * Described in "The Human Genome Browser at UCSC. Kent & al. doi: 10.1101/gr.229102 " * @param beg 0-based start of read (inclusive) * @param end 0-based end of read (exclusive) */ - public static int regionToBin(final int beg, int end) - { + public static int regionToBin(final int beg, int end) { --end; - if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14); - if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17); - if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20); - if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23); - if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26); + if (beg >> 14 == end >> 14) return ((1 << 15) - 1) / 7 + (beg >> 14); + if (beg >> 17 == end >> 17) return ((1 << 12) - 1) / 7 + (beg >> 17); + if (beg >> 20 == end >> 20) return ((1 << 9) - 1) / 7 + (beg >> 20); + if (beg >> 23 == end >> 23) return ((1 << 6) - 1) / 7 + (beg >> 23); + if (beg >> 26 == end >> 26) return ((1 << 3) - 1) / 7 + (beg >> 26); return 0; } /** * calculate the bin given an alignment in [beg,end) - * Described in "The Human Genome Browser at UCSC. Kent & al. doi: 10.1101/gr.229102 " + * Described in "The Human Genome Browser at UCSC. Kent & al. doi: 10.1101/gr.229102 " * @param beg 0-based start of read (inclusive) * @param end 0-based end of read (exclusive) * @param minShift minimum bin width (2^minShift) * @param binDepth number of levels in the binning scheme (including bin 0) */ - public static int regionToBin(final int beg, int end, final int minShift, final int binDepth) - { - final int maxShift = minShift + 3*(binDepth-1); + public static int regionToBin(final int beg, int end, final int minShift, final int binDepth) { + final int maxShift = minShift + 3 * (binDepth - 1); int binWidth = minShift; --end; while (binWidth < maxShift) { - if (beg>>binWidth == end>>binWidth) { - return ((1<< (maxShift - binWidth)) - 1)/7 + (beg>>binWidth); + if (beg >> binWidth == end >> binWidth) { + return ((1 << (maxShift - binWidth)) - 1) / 7 + (beg >> binWidth); } - binWidth+=3; + binWidth += 3; } return 0; @@ -155,19 +147,19 @@ public static int regionToBin(final int beg, int end, final int minShift, final */ public static BitSet regionToBins(final int startPos, final int endPos) { final int maxPos = 0x1FFFFFFF; - final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos; - final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos; + final int start = (startPos <= 0) ? 0 : (startPos - 1) & maxPos; + final int end = (endPos <= 0) ? maxPos : (endPos - 1) & maxPos; if (start > end) { return null; } int k; final BitSet bins = new BitSet(GenomicIndexUtil.MAX_BINS); bins.set(0); - for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bins.set(k); - for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bins.set(k); - for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bins.set(k); - for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bins.set(k); - for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bins.set(k); + for (k = 1 + (start >> 26); k <= 1 + (end >> 26); ++k) bins.set(k); + for (k = 9 + (start >> 23); k <= 9 + (end >> 23); ++k) bins.set(k); + for (k = 73 + (start >> 20); k <= 73 + (end >> 20); ++k) bins.set(k); + for (k = 585 + (start >> 17); k <= 585 + (end >> 17); ++k) bins.set(k); + for (k = 4681 + (start >> 14); k <= 4681 + (end >> 14); ++k) bins.set(k); return bins; } @@ -205,5 +197,4 @@ public static BitSet regionToBins(final int startPos, final int endPos, final in } return bins; } - } diff --git a/src/main/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java b/src/main/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java index 2dd3b6c5ee..447694733f 100644 --- a/src/main/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java +++ b/src/main/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java @@ -52,7 +52,8 @@ class HighAccuracyDownsamplingIterator extends DownsamplingIterator { private Set bufferedRecordsToKeep; /** Override method to make it clear that this iterator attempts to provide a higher accuracy of downsampling. */ - @Override public boolean isHigherAccuracy() { + @Override + public boolean isHigherAccuracy() { return true; } @@ -70,22 +71,24 @@ class HighAccuracyDownsamplingIterator extends DownsamplingIterator { * for 1/accuracy templates, so setting this to extremely small numbers is not advisable. */ public DownsamplingIterator setTargetAccuracy(final double accuracy) { - if (accuracy >= 1 || accuracy <= 1d/Integer.MAX_VALUE) throw new IllegalArgumentException("Illegal value. Must be 1/MAX_INT < accuracy < 1"); + if (accuracy >= 1 || accuracy <= 1d / Integer.MAX_VALUE) + throw new IllegalArgumentException("Illegal value. Must be 1/MAX_INT < accuracy < 1"); this.targetAccuracy = accuracy; return this; } /** Returns true if there is another record available post-downsampling, false otherwise. */ - @Override public boolean hasNext() { + @Override + public boolean hasNext() { return this.nextRecord != null || advance(); } /** Returns the next record from the iterator, or throws an exception if there is no next record. */ - @Override public SAMRecord next() { + @Override + public SAMRecord next() { if (this.nextRecord == null) { throw new NoSuchElementException("Call to next() when hasNext() == false"); - } - else { + } else { final SAMRecord retval = this.nextRecord; advance(); return retval; @@ -107,7 +110,9 @@ protected Iterator getUnderlyingIterator() { protected boolean advance() { this.nextRecord = null; - while (this.nextRecord == null && (this.bufferedRecords.hasNext() || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) { + while (this.nextRecord == null + && (this.bufferedRecords.hasNext() + || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) { final SAMRecord rec = this.bufferedRecords.next(); final String key = rec.getReadName(); final Boolean previous = decisions.get(key); @@ -116,16 +121,14 @@ protected boolean advance() { if (previous == null) { keepThisRecord = this.bufferedRecordsToKeep.contains(rec.getReadName()); decisions.put(key, keepThisRecord); - } - else { + } else { keepThisRecord = previous; } if (keepThisRecord) { this.nextRecord = rec; recordAcceptedRecord(); - } - else { + } else { recordDiscardedRecord(); } } @@ -154,7 +157,7 @@ protected boolean bufferNextChunkOfRecords(final double proportion, final double // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; - final List tmp = new ArrayList(names); + final List tmp = new ArrayList(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); @@ -162,7 +165,7 @@ protected boolean bufferNextChunkOfRecords(final double proportion, final double this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; - this.keptTemplates += names.size(); + this.keptTemplates += names.size(); return !recs.isEmpty(); } @@ -176,7 +179,8 @@ protected boolean bufferNextChunkOfRecords(final double proportion, final double protected int calculateTemplatesToKeep(final int templatesRead, final double proportion) { final double rawTemplatesToKeep = templatesRead * proportion; return (keptTemplates / (double) totalTemplates < proportion) - ? (int) Math.ceil(rawTemplatesToKeep) : (int) Math.floor(rawTemplatesToKeep); + ? (int) Math.ceil(rawTemplatesToKeep) + : (int) Math.floor(rawTemplatesToKeep); } /** @@ -184,7 +188,8 @@ protected int calculateTemplatesToKeep(final int templatesRead, final double pro * observed, so that templatesToRead new keep/reject decisions can be made. The records that are read are placed into recs * and _novel_ template names are placed into names. */ - protected void readFromUnderlyingIterator(final List recs, final Set names, final int templatesToRead) { + protected void readFromUnderlyingIterator( + final List recs, final Set names, final int templatesToRead) { while (this.underlyingIterator.hasNext() && names.size() < templatesToRead) { final SAMRecord rec = this.underlyingIterator.next(); recs.add(rec); diff --git a/src/main/java/htsjdk/samtools/HtsgetBAMFileReader.java b/src/main/java/htsjdk/samtools/HtsgetBAMFileReader.java index a161e00bc0..6aa5ee10c4 100644 --- a/src/main/java/htsjdk/samtools/HtsgetBAMFileReader.java +++ b/src/main/java/htsjdk/samtools/HtsgetBAMFileReader.java @@ -5,7 +5,6 @@ import htsjdk.samtools.util.*; import htsjdk.samtools.util.htsget.*; import htsjdk.samtools.util.zip.InflaterFactory; - import java.io.DataInputStream; import java.io.IOException; import java.io.InputStream; @@ -60,34 +59,24 @@ public class HtsgetBAMFileReader extends SamReader.ReaderImplementation { * @param useAsynchronousIO if true, use asynchronous I/O and prefetching * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream */ - public static HtsgetBAMFileReader fromHtsgetURI(final HtsgetInputResource source, - final boolean eagerDecode, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final boolean useAsynchronousIO, - final InflaterFactory inflaterFactory) throws IOException, URISyntaxException { + public static HtsgetBAMFileReader fromHtsgetURI( + final HtsgetInputResource source, + final boolean eagerDecode, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final boolean useAsynchronousIO, + final InflaterFactory inflaterFactory) + throws IOException, URISyntaxException { HtsgetBAMFileReader reader; try { final URI htsgetUri = HtsgetBAMFileReader.convertHtsgetUriToHttps(source.uri); reader = new HtsgetBAMFileReader( - htsgetUri, - eagerDecode, - validationStringency, - samRecordFactory, - useAsynchronousIO, - inflaterFactory - ); + htsgetUri, eagerDecode, validationStringency, samRecordFactory, useAsynchronousIO, inflaterFactory); } catch (final RuntimeIOException e) { // Fall back to http if htsget server does not support https final URI htsgetUri = HtsgetBAMFileReader.convertHtsgetUriToHttp(source.uri); reader = new HtsgetBAMFileReader( - htsgetUri, - eagerDecode, - validationStringency, - samRecordFactory, - useAsynchronousIO, - inflaterFactory - ); + htsgetUri, eagerDecode, validationStringency, samRecordFactory, useAsynchronousIO, inflaterFactory); } return reader; } @@ -101,12 +90,20 @@ public static HtsgetBAMFileReader fromHtsgetURI(final HtsgetInputResource source * @param samRecordFactory SAM record factory * @param useAsynchronousIO if true, use asynchronous I/O and prefetching */ - public HtsgetBAMFileReader(final URI source, - final boolean eagerDecode, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final boolean useAsynchronousIO) throws IOException { - this(source, eagerDecode, validationStringency, samRecordFactory, useAsynchronousIO, BlockGunzipper.getDefaultInflaterFactory()); + public HtsgetBAMFileReader( + final URI source, + final boolean eagerDecode, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final boolean useAsynchronousIO) + throws IOException { + this( + source, + eagerDecode, + validationStringency, + samRecordFactory, + useAsynchronousIO, + BlockGunzipper.getDefaultInflaterFactory()); } /** @@ -119,12 +116,14 @@ public HtsgetBAMFileReader(final URI source, * @param useAsynchronousIO if true, use asynchronous I/O and prefetching * @param inflaterFactory InflaterFactory used by BlockCompressedInputStream */ - public HtsgetBAMFileReader(final URI source, - final boolean eagerDecode, - final ValidationStringency validationStringency, - final SAMRecordFactory samRecordFactory, - final boolean useAsynchronousIO, - final InflaterFactory inflaterFactory) throws IOException { + public HtsgetBAMFileReader( + final URI source, + final boolean eagerDecode, + final ValidationStringency validationStringency, + final SAMRecordFactory samRecordFactory, + final boolean useAsynchronousIO, + final InflaterFactory inflaterFactory) + throws IOException { this.mSource = source; this.mEagerDecode = eagerDecode; this.mValidationStringency = validationStringency; @@ -135,10 +134,10 @@ public HtsgetBAMFileReader(final URI source, final HtsgetRequest req = new HtsgetRequest(this.mSource).withDataClass(HtsgetClass.header); // Request only the header and use it to construct a SAMFileHeader for this reader try (final InputStream headerStream = req.getResponse().getDataStream()) { - final BinaryCodec headerCodec = new BinaryCodec( - new DataInputStream(this.mUseAsynchronousIO - ? new AsyncBlockCompressedInputStream(headerStream, this.mInflaterFactory) - : new BlockCompressedInputStream(headerStream, this.mInflaterFactory))); + final BinaryCodec headerCodec = new BinaryCodec(new DataInputStream( + this.mUseAsynchronousIO + ? new AsyncBlockCompressedInputStream(headerStream, this.mInflaterFactory) + : new BlockCompressedInputStream(headerStream, this.mInflaterFactory))); this.mFileHeader = BAMFileReader.readHeader(headerCodec, this.mValidationStringency, null); } @@ -310,15 +309,17 @@ public SAMFileSpan getFilePointerSpanningReads() { public CloseableIterator query(final QueryInterval[] intervals, final boolean contained) { QueryInterval.assertIntervalsOptimized(intervals); final List namedIntervals = Arrays.stream(intervals) - .map(i -> new Interval(this.mFileHeader.getSequence(i.referenceIndex).getSequenceName(), i.start, i.end)) - .collect(Collectors.toList()); + .map(i -> new Interval( + this.mFileHeader.getSequence(i.referenceIndex).getSequenceName(), i.start, i.end)) + .collect(Collectors.toList()); return this.query(namedIntervals, contained); } - public CloseableIterator query(final String sequence, final int start, final int end, final boolean contained) { + public CloseableIterator query( + final String sequence, final int start, final int end, final boolean contained) { return this.query( - Collections.singletonList(new Interval(sequence, start, end == -1 ? Integer.MAX_VALUE : end)), - contained); + Collections.singletonList(new Interval(sequence, start, end == -1 ? Integer.MAX_VALUE : end)), + contained); } /** @@ -337,19 +338,17 @@ public CloseableIterator query(final List intervals, final // POST request does not guarantee that all returned records are overlapped/contained // by requested intervals, so we still need to filter, but don't need to filter duplicates chainingIterator = new FilteringSamIterator( - new HtsgetBAMFileIterator( - new HtsgetPOSTRequest(this.mSource) - .withIntervals(intervals) - .withFormat(HtsgetFormat.BAM) - ), - new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); + new HtsgetBAMFileIterator(new HtsgetPOSTRequest(this.mSource) + .withIntervals(intervals) + .withFormat(HtsgetFormat.BAM)), + new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); } else { chainingIterator = new BAMQueryChainingIterator(intervals, contained); } final CloseableIterator queryIterator = this.mUseAsynchronousIO - ? new SAMRecordPrefetchingIterator(chainingIterator, READAHEAD_LIMIT) - : chainingIterator; + ? new SAMRecordPrefetchingIterator(chainingIterator, READAHEAD_LIMIT) + : chainingIterator; this.iterators.add(queryIterator); return queryIterator; } @@ -373,14 +372,14 @@ public CloseableIterator queryAlignmentStart(final String sequence, f return new EmptyBamIterator(); } else { final HtsgetRequest req = new HtsgetRequest(this.mSource) - .withFormat(HtsgetFormat.BAM) - .withInterval(new Interval(sequence, start, start + 1)); + .withFormat(HtsgetFormat.BAM) + .withInterval(new Interval(sequence, start, start + 1)); final CloseableIterator iterator = new HtsgetBAMFileIterator(req); final BAMStartingAtIteratorFilter filter = new BAMStartingAtIteratorFilter(referenceIndex, start); final CloseableIterator filteringIterator = new BAMQueryFilteringIterator(iterator, filter); final CloseableIterator queryIterator = this.mUseAsynchronousIO - ? new SAMRecordPrefetchingIterator(filteringIterator, READAHEAD_LIMIT) - : filteringIterator; + ? new SAMRecordPrefetchingIterator(filteringIterator, READAHEAD_LIMIT) + : filteringIterator; this.iterators.add(queryIterator); return queryIterator; } @@ -396,12 +395,12 @@ public CloseableIterator queryAlignmentStart(final String sequence, f @Override public CloseableIterator queryUnmapped() { final HtsgetRequest req = new HtsgetRequest(this.mSource) - .withFormat(HtsgetFormat.BAM) - .withInterval(HtsgetRequest.UNMAPPED_UNPLACED_INTERVAL); + .withFormat(HtsgetFormat.BAM) + .withInterval(HtsgetRequest.UNMAPPED_UNPLACED_INTERVAL); final CloseableIterator unmappedIterator = new HtsgetBAMFileIterator(req); final CloseableIterator queryIterator = this.mUseAsynchronousIO - ? new SAMRecordPrefetchingIterator(unmappedIterator, READAHEAD_LIMIT) - : unmappedIterator; + ? new SAMRecordPrefetchingIterator(unmappedIterator, READAHEAD_LIMIT) + : unmappedIterator; this.iterators.add(queryIterator); return queryIterator; } @@ -433,8 +432,8 @@ private BlockCompressedInputStream getRequestStream(final HtsgetRequest req) { final InputStream stream = resp.getDataStream(); final BlockCompressedInputStream compressedInputStream = this.mUseAsynchronousIO - ? new AsyncBlockCompressedInputStream(stream, this.mInflaterFactory) - : new BlockCompressedInputStream(stream, this.mInflaterFactory); + ? new AsyncBlockCompressedInputStream(stream, this.mInflaterFactory) + : new BlockCompressedInputStream(stream, this.mInflaterFactory); if (this.mCheckCRC) { compressedInputStream.setCheckCrcs(true); } @@ -449,11 +448,25 @@ private BlockCompressedInputStream getRequestStream(final HtsgetRequest req) { } public static URI convertHtsgetUriToHttps(final URI uri) throws URISyntaxException { - return new URI("https", uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), uri.getQuery(), uri.getFragment()); + return new URI( + "https", + uri.getUserInfo(), + uri.getHost(), + uri.getPort(), + uri.getPath(), + uri.getQuery(), + uri.getFragment()); } public static URI convertHtsgetUriToHttp(final URI uri) throws URISyntaxException { - return new URI("http", uri.getUserInfo(), uri.getHost(), uri.getPort(), uri.getPath(), uri.getQuery(), uri.getFragment()); + return new URI( + "http", + uri.getUserInfo(), + uri.getHost(), + uri.getPort(), + uri.getPath(), + uri.getQuery(), + uri.getFragment()); } private class HtsgetBAMFileIterator implements CloseableIterator { @@ -465,8 +478,7 @@ private class HtsgetBAMFileIterator implements CloseableIterator { public HtsgetBAMFileIterator(final HtsgetRequest req) { this.stream = HtsgetBAMFileReader.this.getRequestStream(req); this.bamRecordCodec = new BAMRecordCodec( - HtsgetBAMFileReader.this.mFileHeader, - HtsgetBAMFileReader.this.mSamRecordFactory); + HtsgetBAMFileReader.this.mFileHeader, HtsgetBAMFileReader.this.mSamRecordFactory); this.bamRecordCodec.setInputStream(new DataInputStream(this.stream)); this.advance(); } @@ -496,9 +508,7 @@ private SAMRecord getNextRecord() { final SAMRecord next = this.bamRecordCodec.decode(); if (HtsgetBAMFileReader.this.mReader != null && next != null) { - next.setFileSource(new SAMFileSource( - HtsgetBAMFileReader.this.mReader, - null)); + next.setFileSource(new SAMFileSource(HtsgetBAMFileReader.this.mReader, null)); } return next; } @@ -512,11 +522,12 @@ private void advance() { this.currentRecord.setValidationStringency(HtsgetBAMFileReader.this.mValidationStringency); if (HtsgetBAMFileReader.this.mValidationStringency != ValidationStringency.SILENT) { - final boolean firstErrorOnly = HtsgetBAMFileReader.this.mValidationStringency == ValidationStringency.STRICT; + final boolean firstErrorOnly = + HtsgetBAMFileReader.this.mValidationStringency == ValidationStringency.STRICT; SAMUtils.processValidationErrors( - this.currentRecord.isValid(firstErrorOnly), - this.samRecordIndex, - HtsgetBAMFileReader.this.mValidationStringency); + this.currentRecord.isValid(firstErrorOnly), + this.samRecordIndex, + HtsgetBAMFileReader.this.mValidationStringency); } } if (HtsgetBAMFileReader.this.mEagerDecode && this.currentRecord != null) { @@ -543,13 +554,13 @@ public BAMQueryChainingIterator(final List intervals, final boolean c this.intervals = intervals; this.contained = contained; this.iterators = intervals.stream() - .map(i -> new Lazy<>(() -> { - final HtsgetRequest req = new HtsgetRequest(HtsgetBAMFileReader.this.mSource) - .withFormat(HtsgetFormat.BAM) - .withInterval(i); - return new HtsgetBAMFileIterator(req); - })) - .iterator(); + .map(i -> new Lazy<>(() -> { + final HtsgetRequest req = new HtsgetRequest(HtsgetBAMFileReader.this.mSource) + .withFormat(HtsgetFormat.BAM) + .withInterval(i); + return new HtsgetBAMFileIterator(req); + })) + .iterator(); this.advanceIterator(); this.advance(); } @@ -591,10 +602,11 @@ private void advanceIterator() { return; } final Locatable currInterval = this.intervals.get(this.currentIntervalIndex); - final Locatable prevInterval = this.currentIntervalIndex == 0 ? null : this.intervals.get(this.currentIntervalIndex - 1); + final Locatable prevInterval = + this.currentIntervalIndex == 0 ? null : this.intervals.get(this.currentIntervalIndex - 1); this.currentIterator = new FilteringSamIterator( - this.iterators.next().get(), - new ConsecutiveDuplicateRecordFilter(currInterval, prevInterval, contained)); + this.iterators.next().get(), + new ConsecutiveDuplicateRecordFilter(currInterval, prevInterval, contained)); this.currentIntervalIndex++; } } @@ -611,7 +623,8 @@ private static class ConsecutiveDuplicateRecordFilter implements SamRecordFilter private final Locatable currInterval; private final boolean contained; - public ConsecutiveDuplicateRecordFilter(final Locatable currInterval, final Locatable prevInterval, final boolean contained) { + public ConsecutiveDuplicateRecordFilter( + final Locatable currInterval, final Locatable prevInterval, final boolean contained) { this.currInterval = currInterval; this.prevInterval = prevInterval; this.contained = contained; @@ -620,8 +633,8 @@ public ConsecutiveDuplicateRecordFilter(final Locatable currInterval, final Loca @Override public boolean filterOut(final SAMRecord record) { return record.getReadUnmappedFlag() && record.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START - ? !this.acceptUnmappedRecord(record) - : !this.acceptRecord(record); + ? !this.acceptUnmappedRecord(record) + : !this.acceptRecord(record); } @Override @@ -631,19 +644,17 @@ public boolean filterOut(final SAMRecord first, final SAMRecord second) { private boolean acceptRecord(final SAMRecord rec) { return this.contained - ? currInterval.contains(rec) && (prevInterval == null || !prevInterval.contains(rec)) - : currInterval.overlaps(rec) && (prevInterval == null || !prevInterval.overlaps(rec)); + ? currInterval.contains(rec) && (prevInterval == null || !prevInterval.contains(rec)) + : currInterval.overlaps(rec) && (prevInterval == null || !prevInterval.overlaps(rec)); } private boolean acceptUnmappedRecord(final SAMRecord rec) { final int start = rec.getStart(); - final boolean matchesCurrInterval = - rec.contigsMatch(currInterval) && - CoordMath.encloses(currInterval.getStart(), currInterval.getEnd(), start, start); - final boolean matchesPrevInterval = - prevInterval != null && - rec.contigsMatch(prevInterval) && - CoordMath.encloses(prevInterval.getStart(), prevInterval.getEnd(), start, start); + final boolean matchesCurrInterval = rec.contigsMatch(currInterval) + && CoordMath.encloses(currInterval.getStart(), currInterval.getEnd(), start, start); + final boolean matchesPrevInterval = prevInterval != null + && rec.contigsMatch(prevInterval) + && CoordMath.encloses(prevInterval.getStart(), prevInterval.getEnd(), start, start); return matchesCurrInterval && !matchesPrevInterval; } } @@ -656,8 +667,7 @@ public static class BAMQueryMultipleIntervalsIteratorFilter implements SamRecord ConsecutiveDuplicateRecordFilter filter; final boolean contained; - public BAMQueryMultipleIntervalsIteratorFilter(final List intervals, - final boolean contained) { + public BAMQueryMultipleIntervalsIteratorFilter(final List intervals, final boolean contained) { this.contained = contained; this.intervals = intervals.iterator(); this.filter = null; @@ -694,9 +704,11 @@ private static class BAMQueryFilteringIterator implements CloseableIterator iterator, final BAMIteratorFilter iteratorFilter) { + public BAMQueryFilteringIterator( + final CloseableIterator iterator, final BAMIteratorFilter iteratorFilter) { this.wrappedIterator = iterator; this.iteratorFilter = iteratorFilter; this.nextRecord = this.advance(); @@ -764,7 +776,6 @@ public SAMRecord next() { } @Override - public void close() { - } + public void close() {} } } diff --git a/src/main/java/htsjdk/samtools/IndexFileBuffer.java b/src/main/java/htsjdk/samtools/IndexFileBuffer.java index e8e421b759..f6b5dd29c9 100644 --- a/src/main/java/htsjdk/samtools/IndexFileBuffer.java +++ b/src/main/java/htsjdk/samtools/IndexFileBuffer.java @@ -8,10 +8,16 @@ */ interface IndexFileBuffer extends Closeable { void readBytes(final byte[] bytes); + int readInteger(); + long readLong(); + void skipBytes(final int count); + void seek(final long position); + long position(); + void close(); -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java b/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java index 4592ce060f..5dcfe086fb 100644 --- a/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java +++ b/src/main/java/htsjdk/samtools/IndexFileBufferFactory.java @@ -3,7 +3,6 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; import java.io.IOException; @@ -17,15 +16,15 @@ static IndexFileBuffer getBuffer(File file, boolean enableMemoryMapping) { throw (new RuntimeIOException(ioe)); } - return isCompressed ? new CompressedIndexFileBuffer(file) : (enableMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file)); + return isCompressed + ? new CompressedIndexFileBuffer(file) + : (enableMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file)); } static IndexFileBuffer getBuffer(SeekableStream seekableStream) { boolean isCompressed; isCompressed = IOUtil.isGZIPInputStream(seekableStream); - return isCompressed ? - new CompressedIndexFileBuffer(seekableStream) : - new IndexStreamBuffer(seekableStream); + return isCompressed ? new CompressedIndexFileBuffer(seekableStream) : new IndexStreamBuffer(seekableStream); } } diff --git a/src/main/java/htsjdk/samtools/IndexStreamBuffer.java b/src/main/java/htsjdk/samtools/IndexStreamBuffer.java index 5486dbe9c0..cd403c2250 100644 --- a/src/main/java/htsjdk/samtools/IndexStreamBuffer.java +++ b/src/main/java/htsjdk/samtools/IndexStreamBuffer.java @@ -2,7 +2,6 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -25,7 +24,9 @@ private static void readFully(final SeekableStream in, final byte[] buffer, fina if (readThisLoop == -1) break; read += readThisLoop; } - if (read != length) throw new RuntimeIOException("Expected to read " + length + " bytes, but expired stream after " + read + "."); + if (read != length) + throw new RuntimeIOException( + "Expected to read " + length + " bytes, but expired stream after " + read + "."); } public IndexStreamBuffer(final SeekableStream s) { @@ -36,8 +37,11 @@ public IndexStreamBuffer(final SeekableStream s) { @Override public void close() { - try { in.close(); } - catch (final IOException e) { throw new RuntimeIOException(e); } + try { + in.close(); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } } @Override @@ -47,8 +51,11 @@ public void readBytes(final byte[] bytes) { @Override public void seek(final long position) { - try { in.seek(position); } - catch (final IOException e) { throw new RuntimeIOException(e); } + try { + in.seek(position); + } catch (final IOException e) { + throw new RuntimeIOException(e); + } } @Override @@ -66,20 +73,24 @@ public long readLong() { @Override public void skipBytes(final int count) { try { - for (int s = count; s > 0;) { - final int skipped = (int)in.skip(s); + for (int s = count; s > 0; ) { + final int skipped = (int) in.skip(s); if (skipped <= 0) { throw new RuntimeIOException("Failed to skip " + s); } s -= skipped; } - } catch (final IOException e) { throw new RuntimeIOException(e); } + } catch (final IOException e) { + throw new RuntimeIOException(e); + } } @Override public long position() { try { return (int) in.position(); - } catch (final IOException e) { throw new RuntimeIOException(e); } + } catch (final IOException e) { + throw new RuntimeIOException(e); + } } } diff --git a/src/main/java/htsjdk/samtools/LinearIndex.java b/src/main/java/htsjdk/samtools/LinearIndex.java index 7ebd6bee57..0b72e7ff32 100644 --- a/src/main/java/htsjdk/samtools/LinearIndex.java +++ b/src/main/java/htsjdk/samtools/LinearIndex.java @@ -68,26 +68,26 @@ public int size() { } public long get(final int index) { - return mIndexEntries[index-mIndexStart]; + return mIndexEntries[index - mIndexStart]; } public static int convertToLinearIndexOffset(final int contigPos) { - final int indexPos = (contigPos <= 0) ? 0 : contigPos-1; + final int indexPos = (contigPos <= 0) ? 0 : contigPos - 1; return indexPos >> BAM_LIDX_SHIFT; } /** - * Gets the minimum offset of any alignment start appearing in this index, according to the linear index. + * Gets the minimum offset of any alignment start appearing in this index, according to the linear index. * @param startPos Starting position for this query. * @return The minimum offset, in chunk format, of any read appearing in this position. */ public long getMinimumOffset(final int startPos) { - final int start = (startPos <= 0) ? 0 : startPos-1; + final int start = (startPos <= 0) ? 0 : startPos - 1; final int regionLinearBin = start >> BAM_LIDX_SHIFT; // System.out.println("# regionLinearBin: " + regionLinearBin); long minimumOffset = 0; - if (regionLinearBin-mIndexStart < mIndexEntries.length) - minimumOffset = mIndexEntries[regionLinearBin-mIndexStart]; + if (regionLinearBin - mIndexStart < mIndexEntries.length) + minimumOffset = mIndexEntries[regionLinearBin - mIndexStart]; return minimumOffset; } @@ -108,9 +108,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; LinearIndex that = (LinearIndex) o; - return mReferenceSequence == that.mReferenceSequence && - mIndexStart == that.mIndexStart && - Arrays.equals(mIndexEntries, that.mIndexEntries); + return mReferenceSequence == that.mReferenceSequence + && mIndexStart == that.mIndexStart + && Arrays.equals(mIndexEntries, that.mIndexEntries); } @Override diff --git a/src/main/java/htsjdk/samtools/MemoryMappedFileBuffer.java b/src/main/java/htsjdk/samtools/MemoryMappedFileBuffer.java index b26747df69..d53aec0555 100644 --- a/src/main/java/htsjdk/samtools/MemoryMappedFileBuffer.java +++ b/src/main/java/htsjdk/samtools/MemoryMappedFileBuffer.java @@ -1,9 +1,7 @@ package htsjdk.samtools; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteOrder; import java.nio.MappedByteBuffer; @@ -17,7 +15,7 @@ class MemoryMappedFileBuffer implements IndexFileBuffer { private MappedByteBuffer mFileBuffer; MemoryMappedFileBuffer(final File file) { - try(final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);) { + try (final FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ); ) { mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size()); mFileBuffer.order(ByteOrder.LITTLE_ENDIAN); } catch (final IOException exc) { diff --git a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java index ece5735f78..24058b3e9e 100644 --- a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java +++ b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.CloseableIterator; - import java.util.Collection; import java.util.Map; import java.util.PriorityQueue; @@ -63,7 +62,8 @@ public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final bo * @param headerMerger The merged header and contents of readers. * @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order. */ - public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection readers, final boolean assumeSorted) { + public MergingSamRecordIterator( + final SamFileHeaderMerger headerMerger, Collection readers, final boolean assumeSorted) { this.samHeaderMerger = headerMerger; this.sortOrder = headerMerger.getMergedHeader().getSortOrder(); this.comparator = getComparator(); @@ -74,8 +74,9 @@ public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collecti for (final SamReader reader : readers) { if (!samHeaderMerger.getHeaders().contains(reader.getFileHeader())) throw new SAMException("All iterators to be merged must be accounted for in the SAM header merger"); - if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted && - reader.getFileHeader().getSortOrder() != this.sortOrder) { + if (!assumeSorted + && this.sortOrder != SAMFileHeader.SortOrder.unsorted + && reader.getFileHeader().getSortOrder() != this.sortOrder) { throw new SAMException("Files are not compatible with sort order"); } } @@ -88,7 +89,10 @@ public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collecti * @param headerMerger The merged header and contents of readers. * @param iterators Iterator traversing over reader contents. */ - public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Map> iterators, final boolean assumeSorted) { + public MergingSamRecordIterator( + final SamFileHeaderMerger headerMerger, + final Map> iterators, + final boolean assumeSorted) { this(headerMerger, iterators.keySet(), assumeSorted); for (final Map.Entry> mapping : iterators.entrySet()) addIfNotEmpty(new ComparableSamRecordIterator(mapping.getKey(), mapping.getValue(), comparator)); @@ -96,8 +100,7 @@ public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final Ma } private void startIterationIfRequired() { - if (initialized) - return; + if (initialized) return; for (final SamReader reader : readers) addIfNotEmpty(new ComparableSamRecordIterator(reader, reader.iterator(), comparator)); initialized = true; @@ -108,9 +111,9 @@ private void startIterationIfRequired() { */ @Override public void close() { - // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue. - for (CloseableIterator iterator : pq) - iterator.close(); + // Iterators not in the priority queue have already been closed; only close down the iterators that are still in + // the priority queue. + for (CloseableIterator iterator : pq) iterator.close(); } /** Returns true if any of the underlying iterators has more records, otherwise false. */ @@ -135,7 +138,8 @@ public SAMRecord next() { if (this.samHeaderMerger.hasReadGroupCollisions()) { final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID); if (oldGroupId != null) { - final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(), oldGroupId); + final String newGroupId = + this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(), oldGroupId); record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); } } @@ -144,7 +148,8 @@ public SAMRecord next() { if (this.samHeaderMerger.hasProgramGroupCollisions()) { final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID); if (oldGroupId != null) { - final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(), oldGroupId); + final String newGroupId = this.samHeaderMerger.getProgramGroupId( + iterator.getReader().getFileHeader(), oldGroupId); record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); } } diff --git a/src/main/java/htsjdk/samtools/QueryInterval.java b/src/main/java/htsjdk/samtools/QueryInterval.java index a41819cbc0..4ba661f609 100644 --- a/src/main/java/htsjdk/samtools/QueryInterval.java +++ b/src/main/java/htsjdk/samtools/QueryInterval.java @@ -1,7 +1,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.CoordMath; - import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -15,10 +14,9 @@ public class QueryInterval implements Comparable { public final int referenceIndex; /** 1-based, inclusive */ public final int start; - /** 1-based, inclusive. If <= 0, implies that the interval goes to the end of the reference sequence */ + /** 1-based, inclusive. If {@code <= 0}, implies that the interval goes to the end of the reference sequence */ public final int end; - public QueryInterval(final int referenceIndex, final int start, final int end) { if (referenceIndex < 0) { throw new IllegalArgumentException("Invalid reference index " + referenceIndex); @@ -28,7 +26,6 @@ public QueryInterval(final int referenceIndex, final int start, final int end) { this.end = end; } - @Override public int compareTo(final QueryInterval other) { int comp = this.referenceIndex - other.referenceIndex; @@ -78,7 +75,6 @@ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputInter final List unique = new ArrayList(); QueryInterval previous = inputIntervals[0]; - for (int i = 1; i < inputIntervals.length; ++i) { final QueryInterval next = inputIntervals[i]; if (previous.endsAtStartOf(next) || previous.overlaps(next)) { @@ -102,16 +98,19 @@ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputInter public static void assertIntervalsOptimized(final QueryInterval[] intervals) { if (intervals.length == 0) return; for (int i = 1; i < intervals.length; ++i) { - final QueryInterval prev = intervals[i-1]; + final QueryInterval prev = intervals[i - 1]; final QueryInterval thisInterval = intervals[i]; if (prev.compareTo(thisInterval) >= 0) { - throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); + throw new IllegalArgumentException( + String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval)); } if (prev.overlaps(thisInterval)) { - throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); + throw new IllegalArgumentException( + String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval)); } if (prev.endsAtStartOf(thisInterval)) { - throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); + throw new IllegalArgumentException( + String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval)); } } } diff --git a/src/main/java/htsjdk/samtools/RandomAccessFileBuffer.java b/src/main/java/htsjdk/samtools/RandomAccessFileBuffer.java index 3eb271a0ff..1d5be658f1 100644 --- a/src/main/java/htsjdk/samtools/RandomAccessFileBuffer.java +++ b/src/main/java/htsjdk/samtools/RandomAccessFileBuffer.java @@ -1,7 +1,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.RuntimeIOException; - import java.io.File; import java.io.IOException; import java.io.RandomAccessFile; @@ -20,7 +19,7 @@ */ class RandomAccessFileBuffer implements IndexFileBuffer { private static final int PAGE_SIZE = 4 * 1024; - private static final int PAGE_OFFSET_MASK = PAGE_SIZE-1; + private static final int PAGE_OFFSET_MASK = PAGE_SIZE - 1; private static final int PAGE_MASK = ~PAGE_OFFSET_MASK; private static final int INVALID_PAGE = 1; private final File mFile; @@ -53,7 +52,7 @@ public void readBytes(final byte[] bytes) { } while (resultLength > 0) { loadPage(mFilePointer); - final int pageOffset = (int)mFilePointer & PAGE_OFFSET_MASK; + final int pageOffset = (int) mFilePointer & PAGE_OFFSET_MASK; final int copyLength = Math.min(resultLength, PAGE_SIZE - pageOffset); System.arraycopy(mBuffer, pageOffset, bytes, resultOffset, copyLength); mFilePointer += copyLength; @@ -66,12 +65,12 @@ public void readBytes(final byte[] bytes) { public int readInteger() { // This takes advantage of the fact that integers in BAM index files are always 4-byte aligned. loadPage(mFilePointer); - final int pageOffset = (int)mFilePointer & PAGE_OFFSET_MASK; + final int pageOffset = (int) mFilePointer & PAGE_OFFSET_MASK; mFilePointer += 4; - return((mBuffer[pageOffset + 0] & 0xFF) | - ((mBuffer[pageOffset + 1] & 0xFF) << 8) | - ((mBuffer[pageOffset + 2] & 0xFF) << 16) | - ((mBuffer[pageOffset + 3] & 0xFF) << 24)); + return ((mBuffer[pageOffset + 0] & 0xFF) + | ((mBuffer[pageOffset + 1] & 0xFF) << 8) + | ((mBuffer[pageOffset + 2] & 0xFF) << 16) + | ((mBuffer[pageOffset + 3] & 0xFF) << 24)); } @Override @@ -113,7 +112,7 @@ public void close() { } private void loadPage(final long filePosition) { - final int page = (int)filePosition & PAGE_MASK; + final int page = (int) filePosition & PAGE_MASK; if (page == mCurrentPage) { return; } diff --git a/src/main/java/htsjdk/samtools/ReservedTagConstants.java b/src/main/java/htsjdk/samtools/ReservedTagConstants.java index c0d03b8c99..ba60ee5140 100644 --- a/src/main/java/htsjdk/samtools/ReservedTagConstants.java +++ b/src/main/java/htsjdk/samtools/ReservedTagConstants.java @@ -1,70 +1,69 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools; - -/** - * Constants for tags used in our SAM/BAM files - */ -public class ReservedTagConstants { - public static final String READ_GROUP_ID = SAMTag.RG.name(); // Specified in the SAM spec doc - public static final String PROGRAM_GROUP_ID = SAMTag.PG.name(); // Specified in the SAM spec doc - - /** Present and set to 1 if a read is a noise read. */ - public static final String XN = "XN"; - - /** Number of nucleotide differences (Specified in the SAM spec doc) */ - public static final String NM = SAMTag.NM.name(); - - /** The sum of the mismatched qualities. */ - public static final String XQ = "XQ"; - - /** - * The name of an attribute which stores the 1-based index of the start of - * sequence within a read (in original orientation) that should be clipped - * or trimmed before alignment and downstream use. - * The region to be clipped extends from this position to the end of the read. - */ - public static final String XT = "XT"; - - /** The original sequence before 454 cafie and homopolymer correction */ - public static final String XS = "XS"; - - /** The Four54 edit string of 454 cafie and homopolymer corrections - *

    -     *   editString ::= {base operator position [- position]}* ;  // Cafie needs 2 positions
    -     *   base ::= A | T | G | C | N ;   // N only for undercall
    -     *   operator ::= o | u | c ;       // o = Overcall, u = Undercall, c = Cafie.
    -     *   position is 0 based position of the correction (assuming forward strand) .  Cafie positions are to-from.
    -     *   For example: XF :Z:Gc4-6Nu11Co15 means a cafie correction moved a G from position 6 to 4,
    -     *   an N was inserted for an undercall at position 11, and a C was removed as an overcall at position 15
    -     */
    -    public static final String XF = "XF";
    -
    -    /** The original pred quality scores before modifications such as 454 cafie and homopolymer correction */
    -    public static final String OQ = SAMTag.OQ.name();
    -
    -    /** The original cigar before indel cleaning, or 454 cafie and homopolymer correction */
    -    public static final String OC = "OC";
    -
    -}
    +/*
    + * The MIT License
    + *
    + * Copyright (c) 2009 The Broad Institute
    + *
    + * Permission is hereby granted, free of charge, to any person obtaining a copy
    + * of this software and associated documentation files (the "Software"), to deal
    + * in the Software without restriction, including without limitation the rights
    + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    + * copies of the Software, and to permit persons to whom the Software is
    + * furnished to do so, subject to the following conditions:
    + *
    + * The above copyright notice and this permission notice shall be included in
    + * all copies or substantial portions of the Software.
    + *
    + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    + * THE SOFTWARE.
    + */
    +package htsjdk.samtools;
    +
    +/**
    + * Constants for tags used in our SAM/BAM files
    + */
    +public class ReservedTagConstants {
    +    public static final String READ_GROUP_ID = SAMTag.RG.name(); // Specified in the SAM spec doc
    +    public static final String PROGRAM_GROUP_ID = SAMTag.PG.name(); // Specified in the SAM spec doc
    +
    +    /** Present and set to 1 if a read is a noise read. */
    +    public static final String XN = "XN";
    +
    +    /** Number of nucleotide differences (Specified in the SAM spec doc) */
    +    public static final String NM = SAMTag.NM.name();
    +
    +    /** The sum of the mismatched qualities. */
    +    public static final String XQ = "XQ";
    +
    +    /**
    +     * The name of an attribute which stores the 1-based index of the start of
    +     * sequence within a read (in original orientation) that should be clipped
    +     * or trimmed before alignment and downstream use.
    +     * The region to be clipped extends from this position to the end of the read.
    +     */
    +    public static final String XT = "XT";
    +
    +    /** The original sequence before 454 cafie and homopolymer correction */
    +    public static final String XS = "XS";
    +
    +    /** The Four54 edit string of 454 cafie and homopolymer corrections
    +     * 
    +     *   editString ::= {base operator position [- position]}* ;  // Cafie needs 2 positions
    +     *   base ::= A | T | G | C | N ;   // N only for undercall
    +     *   operator ::= o | u | c ;       // o = Overcall, u = Undercall, c = Cafie.
    +     *   position is 0 based position of the correction (assuming forward strand) .  Cafie positions are to-from.
    +     *   For example: XF :Z:Gc4-6Nu11Co15 means a cafie correction moved a G from position 6 to 4,
    +     *   an N was inserted for an undercall at position 11, and a C was removed as an overcall at position 15
    +     */
    +    public static final String XF = "XF";
    +
    +    /** The original pred quality scores before modifications such as 454 cafie and homopolymer correction */
    +    public static final String OQ = SAMTag.OQ.name();
    +
    +    /** The original cigar before indel cleaning, or 454 cafie and homopolymer correction */
    +    public static final String OC = "OC";
    +}
    diff --git a/src/main/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java b/src/main/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
    index 2eeb67fa75..b293256193 100644
    --- a/src/main/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
    +++ b/src/main/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
    @@ -33,12 +33,16 @@ public class SAMBinaryTagAndUnsignedArrayValue extends SAMBinaryTagAndValue {
         public SAMBinaryTagAndUnsignedArrayValue(final short tag, final Object value) {
             super(tag, value);
             if (!value.getClass().isArray() || value instanceof float[]) {
    -            throw new IllegalArgumentException("Attribute type " + value.getClass() +
    -                    " cannot be encoded as an unsigned array. Tag: " +
    -                    SAMTag.makeStringTag(tag));
    +            throw new IllegalArgumentException("Attribute type " + value.getClass()
    +                    + " cannot be encoded as an unsigned array. Tag: " + SAMTag.makeStringTag(tag));
             }
         }
     
    +    /** Package-private constructor that skips validation, for use in BinaryTagCodec.readTags(). */
    +    SAMBinaryTagAndUnsignedArrayValue(final short tag, final Object value, final boolean skipValidation) {
    +        super(tag, value, skipValidation);
    +    }
    +
         /** Creates and returns a shallow copy of the list of tag/values. */
         @Override
         public SAMBinaryTagAndValue copy() {
    @@ -57,7 +61,6 @@ public SAMBinaryTagAndValue deepCopy() {
             return retval;
         }
     
    -
         @Override
         public boolean isUnsignedArray() {
             return true;
    diff --git a/src/main/java/htsjdk/samtools/SAMBinaryTagAndValue.java b/src/main/java/htsjdk/samtools/SAMBinaryTagAndValue.java
    index 578c7e6785..c84bdc4752 100644
    --- a/src/main/java/htsjdk/samtools/SAMBinaryTagAndValue.java
    +++ b/src/main/java/htsjdk/samtools/SAMBinaryTagAndValue.java
    @@ -58,26 +58,35 @@ public SAMBinaryTagAndValue(final short tag, final Object value) {
                 throw new IllegalArgumentException("SAMBinaryTagAndValue value may not be null");
             }
             if (!isAllowedAttributeValue(value)) {
    -            throw new IllegalArgumentException("Attribute type " + value.getClass() + " not supported. Tag: " +
    -                    SAMTag.makeStringTag(tag));
    +            throw new IllegalArgumentException(
    +                    "Attribute type " + value.getClass() + " not supported. Tag: " + SAMTag.makeStringTag(tag));
             }
             this.tag = tag;
             this.value = value;
         }
     
    +    /**
    +     * Package-private constructor that skips type validation, for use in performance-critical
    +     * paths (e.g. BinaryTagCodec.readTags) where the value type is known to be valid.
    +     */
    +    SAMBinaryTagAndValue(final short tag, final Object value, final boolean skipValidation) {
    +        this.tag = tag;
    +        this.value = value;
    +    }
    +
         // Inspect the proposed value to determine if it is an allowed value type,
         // and if the value is in range.
         protected static boolean isAllowedAttributeValue(final Object value) {
    -            if (value instanceof Byte ||
    -                value instanceof Short ||
    -                value instanceof Integer ||
    -                value instanceof String ||
    -                value instanceof Character ||
    -                value instanceof Float ||
    -                value instanceof byte[] ||
    -                value instanceof short[] ||
    -                value instanceof int[] ||
    -                value instanceof float[]) {
    +        if (value instanceof Byte
    +                || value instanceof Short
    +                || value instanceof Integer
    +                || value instanceof String
    +                || value instanceof Character
    +                || value instanceof Float
    +                || value instanceof byte[]
    +                || value instanceof short[]
    +                || value instanceof int[]
    +                || value instanceof float[]) {
                 return true;
             }
     
    @@ -90,7 +99,8 @@ protected static boolean isAllowedAttributeValue(final Object value) {
             return false;
         }
     
    -    @Override public boolean equals(final Object o) {
    +    @Override
    +    public boolean equals(final Object o) {
             if (this == o) return true;
             if (o == null || getClass() != o.getClass()) return false;
             return typeSafeEquals((SAMBinaryTagAndValue) o);
    @@ -102,30 +112,21 @@ private boolean typeSafeEquals(final SAMBinaryTagAndValue that) {
             if (this.valueEquals(that)) {
                 if (this.next == null) return that.next == null;
                 else return this.next.equals(that.next);
    -        }
    -        else {
    +        } else {
                 return false;
             }
         }
     
         private boolean valueEquals(SAMBinaryTagAndValue that) {
             if (this.value instanceof byte[]) {
    -            return that.value instanceof byte[] ?
    -                Arrays.equals((byte[])this.value, (byte[])that.value) : false;
    -        }
    -        else if (this.value instanceof short[]) {
    -            return that.value instanceof short[] ?
    -                    Arrays.equals((short[])this.value, (short[])that.value) : false;
    -        }
    -        else if (this.value instanceof int[]) {
    -            return that.value instanceof int[] ?
    -                    Arrays.equals((int[])this.value, (int[])that.value) : false;
    -        }
    -        else if (this.value instanceof float[]) {
    -            return that.value instanceof float[] ?
    -                    Arrays.equals((float[])this.value, (float[])that.value) : false;
    -        }
    -        else {
    +            return that.value instanceof byte[] ? Arrays.equals((byte[]) this.value, (byte[]) that.value) : false;
    +        } else if (this.value instanceof short[]) {
    +            return that.value instanceof short[] ? Arrays.equals((short[]) this.value, (short[]) that.value) : false;
    +        } else if (this.value instanceof int[]) {
    +            return that.value instanceof int[] ? Arrays.equals((int[]) this.value, (int[]) that.value) : false;
    +        } else if (this.value instanceof float[]) {
    +            return that.value instanceof float[] ? Arrays.equals((float[]) this.value, (float[]) that.value) : false;
    +        } else {
                 // otherwise, the api limits the remaining possible value types to
                 // immutable (String or boxed primitive) types
                 return this.value.equals(that.value);
    @@ -136,18 +137,14 @@ else if (this.value instanceof float[]) {
         public int hashCode() {
             int valueHash;
             if (this.value instanceof byte[]) {
    -            valueHash = Arrays.hashCode((byte[])this.value);
    -        }
    -        else if (this.value instanceof short[]) {
    -            valueHash = Arrays.hashCode((short[])this.value);
    -        }
    -        else if (this.value instanceof int[]) {
    -            valueHash = Arrays.hashCode((int[])this.value);
    -        }
    -        else if (this.value instanceof float[]) {
    -            valueHash = Arrays.hashCode((float[])this.value);
    -        }
    -        else {
    +            valueHash = Arrays.hashCode((byte[]) this.value);
    +        } else if (this.value instanceof short[]) {
    +            valueHash = Arrays.hashCode((short[]) this.value);
    +        } else if (this.value instanceof int[]) {
    +            valueHash = Arrays.hashCode((int[]) this.value);
    +        } else if (this.value instanceof float[]) {
    +            valueHash = Arrays.hashCode((float[]) this.value);
    +        } else {
                 // otherwise, the api limits the remaining possible value types to
                 // immutable (String or boxed primitive) types
                 valueHash = value.hashCode();
    @@ -180,17 +177,13 @@ protected Object cloneValue() {
     
             if (value instanceof byte[]) {
                 valueClone = ((byte[]) value).clone();
    -        }
    -        else if (value instanceof short[]) {
    +        } else if (value instanceof short[]) {
                 valueClone = ((short[]) value).clone();
    -        }
    -        else if (value instanceof int[]) {
    +        } else if (value instanceof int[]) {
                 valueClone = ((int[]) value).clone();
    -        }
    -        else if (value instanceof float[]) {
    +        } else if (value instanceof float[]) {
                 valueClone = ((float[]) value).clone();
    -        }
    -        else {
    +        } else {
                 // otherwise, the api limits the remaining possible value types to
                 // immutable (String or boxed primitive) types
                 valueClone = value;
    @@ -200,7 +193,9 @@ else if (value instanceof float[]) {
     
         // The methods below are for implementing a light-weight, single-direction linked list
     
    -    public SAMBinaryTagAndValue getNext() { return this.next; }
    +    public SAMBinaryTagAndValue getNext() {
    +        return this.next;
    +    }
     
         /** Inserts an item into the ordered list of attributes and returns the head of the list/sub-list */
         public SAMBinaryTagAndValue insert(final SAMBinaryTagAndValue attr) {
    @@ -211,18 +206,15 @@ public SAMBinaryTagAndValue insert(final SAMBinaryTagAndValue attr) {
                 // attr joins the list ahead of this element
                 attr.next = this;
                 return attr;
    -        }
    -        else if (this.tag == attr.tag) {
    +        } else if (this.tag == attr.tag) {
                 // attr replaces this in the list
                 attr.next = this.next;
                 return attr;
    -        }
    -        else if (this.next == null) {
    +        } else if (this.next == null) {
                 // attr gets stuck on the end
                 this.next = attr;
                 return this;
    -        }
    -        else {
    +        } else {
                 // attr gets inserted somewhere in the tail
                 this.next = this.next.insert(attr);
                 return this;
    @@ -242,7 +234,7 @@ public SAMBinaryTagAndValue remove(final short tag) {
         public SAMBinaryTagAndValue find(final short tag) {
             if (this.tag == tag) return this;
             else if (this.tag > tag || this.next == null) return null;
    -        else return this.next.find(tag); 
    +        else return this.next.find(tag);
         }
     
         public boolean isUnsignedArray() {
    diff --git a/src/main/java/htsjdk/samtools/SAMException.java b/src/main/java/htsjdk/samtools/SAMException.java
    index add07456de..00d7d8b975 100644
    --- a/src/main/java/htsjdk/samtools/SAMException.java
    +++ b/src/main/java/htsjdk/samtools/SAMException.java
    @@ -27,8 +27,7 @@
      * @author alecw@broadinstitute.org
      */
     public class SAMException extends RuntimeException {
    -    public SAMException() {
    -    }
    +    public SAMException() {}
     
         public SAMException(final String s) {
             super(s);
    diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java
    index 4796683ab2..9d4f69e0a2 100644
    --- a/src/main/java/htsjdk/samtools/SAMFileHeader.java
    +++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java
    @@ -23,12 +23,10 @@
      */
     package htsjdk.samtools;
     
    -
     import htsjdk.beta.plugin.HtsHeader;
     import htsjdk.samtools.util.BufferedLineReader;
     import htsjdk.samtools.util.CollectionUtil;
     import htsjdk.samtools.util.Log;
    -
     import java.io.StringWriter;
     import java.util.*;
     import java.util.function.Supplier;
    @@ -36,13 +34,13 @@
     /**
      * Header information from a SAM or BAM file.
      */
    -public class SAMFileHeader extends AbstractSAMHeaderRecord implements HtsHeader
    -{
    +public class SAMFileHeader extends AbstractSAMHeaderRecord implements HtsHeader {
         public static final String VERSION_TAG = "VN";
         public static final String SORT_ORDER_TAG = "SO";
         public static final String GROUP_ORDER_TAG = "GO";
         public static final String CURRENT_VERSION = "1.6";
    -    public static final Set ACCEPTABLE_VERSIONS = CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5", CURRENT_VERSION );
    +    public static final Set ACCEPTABLE_VERSIONS =
    +            CollectionUtil.makeSet("1.0", "1.3", "1.4", "1.5", CURRENT_VERSION);
     
         private SortOrder sortOrder = null;
         private GroupOrder groupOrder = null;
    @@ -93,7 +91,9 @@ public SAMRecordComparator getComparatorInstance() {
         }
     
         public enum GroupOrder {
    -        none, query, reference
    +        none,
    +        query,
    +        reference
         }
     
         private List mReadGroups = new ArrayList<>();
    @@ -101,7 +101,7 @@ public enum GroupOrder {
         private final Map mReadGroupMap = new HashMap<>();
         private final Map mProgramRecordMap = new HashMap<>();
         private SAMSequenceDictionary mSequenceDictionary = new SAMSequenceDictionary();
    -    final private List mComments = new ArrayList<>();
    +    private final List mComments = new ArrayList<>();
         private final List mValidationErrors = new ArrayList<>();
     
         public SAMFileHeader() {
    @@ -186,8 +186,8 @@ public void setReadGroups(final List readGroups) {
     
         public void addReadGroup(final SAMReadGroupRecord readGroup) {
             if (mReadGroupMap.containsKey(readGroup.getReadGroupId())) {
    -            throw new IllegalArgumentException("Read group with group id " +
    -                readGroup.getReadGroupId() + " already exists in SAMFileHeader!");
    +            throw new IllegalArgumentException(
    +                    "Read group with group id " + readGroup.getReadGroupId() + " already exists in SAMFileHeader!");
             }
             mReadGroups.add(readGroup);
             mReadGroupMap.put(readGroup.getReadGroupId(), readGroup);
    @@ -199,8 +199,8 @@ public List getProgramRecords() {
     
         public void addProgramRecord(final SAMProgramRecord programRecord) {
             if (mProgramRecordMap.containsKey(programRecord.getProgramGroupId())) {
    -            throw new IllegalArgumentException("Program record with group id " +
    -                programRecord.getProgramGroupId() + " already exists in SAMFileHeader!");
    +            throw new IllegalArgumentException("Program record with group id " + programRecord.getProgramGroupId()
    +                    + " already exists in SAMFileHeader!");
             }
             this.mProgramRecords.add(programRecord);
             this.mProgramRecordMap.put(programRecord.getProgramGroupId(), programRecord);
    @@ -281,14 +281,13 @@ public void setGroupOrder(final GroupOrder go) {
             super.setAttribute(GROUP_ORDER_TAG, go.name());
         }
     
    -
         /**
          * Set the given value for the attribute named 'key'.  Replaces an existing value, if any.
          * If value is null, the attribute is removed.
          * Otherwise, the value will be converted to a String with toString.
          * @param key attribute name
          * @param value attribute value
    -     * @deprecated Use {@link #setAttribute(String, String) instead
    +     * @deprecated Use {@link #setAttribute(String, String)} instead
          */
         @Deprecated
         @Override
    @@ -323,10 +322,14 @@ public void setAttribute(final String key, final String value) {
         }
     
         /** @deprecated since May 1st 2019 - text version of header is no longer stored. */
    -    @Deprecated public String getTextHeader() {  return null; }
    +    @Deprecated
    +    public String getTextHeader() {
    +        return null;
    +    }
     
         /** @deprecated since May 1st 2019 - text version of header is no longer stored. */
    -    @Deprecated public void setTextHeader(final String textHeader) { }
    +    @Deprecated
    +    public void setTextHeader(final String textHeader) {}
     
         public List getComments() {
             return Collections.unmodifiableList(mComments);
    @@ -339,7 +342,6 @@ public void addComment(String comment) {
             mComments.add(comment);
         }
     
    -
         /**
          * Replace existing comments with the contents of the given collection.
          */
    @@ -377,8 +379,9 @@ public boolean equals(final Object o) {
             if (mProgramRecords != null ? !mProgramRecords.equals(that.mProgramRecords) : that.mProgramRecords != null)
                 return false;
             if (mReadGroups != null ? !mReadGroups.equals(that.mReadGroups) : that.mReadGroups != null) return false;
    -        if (mSequenceDictionary != null ? !mSequenceDictionary.equals(that.mSequenceDictionary) : that.mSequenceDictionary != null)
    -            return false;
    +        if (mSequenceDictionary != null
    +                ? !mSequenceDictionary.equals(that.mSequenceDictionary)
    +                : that.mSequenceDictionary != null) return false;
     
             return true;
         }
    @@ -432,7 +435,8 @@ public String getNonCollidingId(final String recordId) {
                     // our old process of just counting from 0 upward and adding that to the previous id led to 1000s of
                     // calls idsThatAreAlreadyTaken.contains() just to resolve 1 collision when merging 1000s of similarly
                     // processed bams.
    -                while (idsThatAreAlreadyTaken.contains(newId = recordId + "." + SamFileHeaderMerger.positiveFourDigitBase36Str(recordCounter++)))
    +                while (idsThatAreAlreadyTaken.contains(
    +                        newId = recordId + "." + SamFileHeaderMerger.positiveFourDigitBase36Str(recordCounter++)))
                         ;
     
                     idsThatAreAlreadyTaken.add(newId);
    diff --git a/src/main/java/htsjdk/samtools/SAMFileWriter.java b/src/main/java/htsjdk/samtools/SAMFileWriter.java
    index 68c9cc1abc..d312ab1ced 100644
    --- a/src/main/java/htsjdk/samtools/SAMFileWriter.java
    +++ b/src/main/java/htsjdk/samtools/SAMFileWriter.java
    @@ -23,9 +23,8 @@
      */
     package htsjdk.samtools;
     
    -import java.io.Closeable;
    -
     import htsjdk.samtools.util.ProgressLoggerInterface;
    +import java.io.Closeable;
     
     /**
      * Interface for SAMText and BAM file writers.  Clients need not care which they write to,
    @@ -33,23 +32,24 @@
      */
     public interface SAMFileWriter extends Closeable {
     
    -	void addAlignment(SAMRecord alignment);
    +    void addAlignment(SAMRecord alignment);
     
         SAMFileHeader getFileHeader();
     
    -	/**
    -	 * Sets a ProgressLogger on this writer. This is useful when pulling, for instance, from a
    -	 * SortingCollection.
    -	 */
    -	void setProgressLogger(final ProgressLoggerInterface progress);
    +    /**
    +     * Sets a ProgressLogger on this writer. This is useful when pulling, for instance, from a
    +     * SortingCollection.
    +     */
    +    void setProgressLogger(final ProgressLoggerInterface progress);
     
    -	/** If true writers that are writing pre-sorted records should check the order during writing. */
    -	default void setSortOrderChecking(final boolean check) {
    -		throw new UnsupportedOperationException("Operation not supported on " + getClass().getName());
    -	}
    +    /** If true writers that are writing pre-sorted records should check the order during writing. */
    +    default void setSortOrderChecking(final boolean check) {
    +        throw new UnsupportedOperationException(
    +                "Operation not supported on " + getClass().getName());
    +    }
     
         /**
    -     * Must be called to flush or file will likely be defective. 
    +     * Must be called to flush or file will likely be defective.
          */
         @Override
         void close();
    diff --git a/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java
    index b9bd1dcb14..54d2f5690c 100644
    --- a/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java
    +++ b/src/main/java/htsjdk/samtools/SAMFileWriterFactory.java
    @@ -23,6 +23,8 @@
      */
     package htsjdk.samtools;
     
    +import static htsjdk.samtools.SamReader.Type.*;
    +
     import htsjdk.samtools.cram.ref.CRAMReferenceSource;
     import htsjdk.samtools.cram.ref.ReferenceSource;
     import htsjdk.samtools.cram.structure.CRAMEncodingStrategy;
    @@ -33,20 +35,18 @@
     import htsjdk.samtools.util.Md5CalculatingOutputStream;
     import htsjdk.samtools.util.RuntimeIOException;
     import htsjdk.samtools.util.zip.DeflaterFactory;
    -
     import java.io.File;
     import java.io.IOException;
     import java.io.OutputStream;
     import java.nio.file.Files;
     import java.nio.file.Path;
     import java.util.zip.Deflater;
    -import static htsjdk.samtools.SamReader.Type.*;
     
     /**
      * Create a writer for writing SAM, BAM, or CRAM files.
      */
     public class SAMFileWriterFactory implements Cloneable {
    -    private final static Log log = Log.getInstance(SAMFileWriterFactory.class);
    +    private static final Log log = Log.getInstance(SAMFileWriterFactory.class);
         private static boolean defaultCreateIndexWhileWriting = Defaults.CREATE_INDEX;
         private boolean createIndex = defaultCreateIndexWhileWriting;
         private static boolean defaultCreateMd5File = Defaults.CREATE_MD5;
    @@ -57,17 +57,18 @@ public class SAMFileWriterFactory implements Cloneable {
         private File tmpDir;
         /** compression level 0: min 9:max */
         private int compressionLevel = BlockCompressedOutputStream.getDefaultCompressionLevel();
    +
         private SamFlagField samFlagFieldOutput = SamFlagField.NONE;
         private Integer maxRecordsInRam = null;
         private DeflaterFactory deflaterFactory = BlockCompressedOutputStream.getDefaultDeflaterFactory();
    +    private CRAMEncodingStrategy cramEncodingStrategy = new CRAMEncodingStrategy();
     
         /** simple constructor */
    -    public SAMFileWriterFactory() {
    -    }
    -    
    +    public SAMFileWriterFactory() {}
    +
         /** copy constructor */
    -    public SAMFileWriterFactory( final SAMFileWriterFactory other) {
    -        if( other == null ) throw new IllegalArgumentException("SAMFileWriterFactory(null)");
    +    public SAMFileWriterFactory(final SAMFileWriterFactory other) {
    +        if (other == null) throw new IllegalArgumentException("SAMFileWriterFactory(null)");
             this.createIndex = other.createIndex;
             this.createMd5File = other.createMd5File;
             this.useAsyncIo = other.useAsyncIo;
    @@ -76,8 +77,9 @@ public SAMFileWriterFactory( final SAMFileWriterFactory other) {
             this.tmpDir = other.tmpDir;
             this.compressionLevel = other.compressionLevel;
             this.maxRecordsInRam = other.maxRecordsInRam;
    +        this.cramEncodingStrategy = other.cramEncodingStrategy;
         }
    -    
    +
         @Override
         public SAMFileWriterFactory clone() {
             return new SAMFileWriterFactory(this);
    @@ -110,8 +112,8 @@ public SAMFileWriterFactory setCreateMd5File(final boolean createMd5File) {
          * Set the deflater factory used by BAM writers created by this writer factory. Must not be null.
          * If this method is not called, the default  {@link DeflaterFactory} is used which creates the default JDK {@link Deflater}.
          * This method returns the SAMFileWriterFactory itself. */
    -    public SAMFileWriterFactory setDeflaterFactory(final DeflaterFactory deflaterFactory){
    -        if (deflaterFactory == null){
    +    public SAMFileWriterFactory setDeflaterFactory(final DeflaterFactory deflaterFactory) {
    +        if (deflaterFactory == null) {
                 throw new IllegalArgumentException("null deflater factory");
             }
             this.deflaterFactory = deflaterFactory;
    @@ -123,11 +125,11 @@ public SAMFileWriterFactory setCompressionLevel(final int compressionLevel) {
             this.compressionLevel = Math.min(9, Math.max(0, compressionLevel));
             return this;
         }
    -    
    +
         public int getCompressionLevel() {
             return compressionLevel;
         }
    -    
    +
         /**
          * Sets the default for subsequent SAMFileWriterFactories
          * that do not specify whether to create an index.
    @@ -241,6 +243,30 @@ public SAMFileWriterFactory setSamFlagFieldOutput(final SamFlagField samFlagFiel
             return this;
         }
     
    +    /**
    +     * Set the {@link CRAMEncodingStrategy} to use when creating CRAM writers. Controls the CRAM version,
    +     * compression profile, and per-data-series codec selection.
    +     *
    +     * 

    The default strategy uses the {@link htsjdk.samtools.cram.structure.CRAMCompressionProfile#NORMAL} profile. + * To use a specific profile: + *

    +     *   factory.setCRAMEncodingStrategy(CRAMCompressionProfile.ARCHIVE.toStrategy());
    +     * 
    + * + * @param cramEncodingStrategy the encoding strategy to use for CRAM output + * @return this factory for chaining + */ + public SAMFileWriterFactory setCRAMEncodingStrategy(final CRAMEncodingStrategy cramEncodingStrategy) { + if (cramEncodingStrategy == null) throw new IllegalArgumentException("CRAM encoding strategy was null"); + this.cramEncodingStrategy = cramEncodingStrategy; + return this; + } + + /** @return the current CRAM encoding strategy */ + public CRAMEncodingStrategy getCRAMEncodingStrategy() { + return cramEncodingStrategy; + } + /** * Create a BAMFileWriter that is ready to receive SAMRecords. Uses default compression level. * @@ -271,8 +297,8 @@ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean pre * @param outputFile where to write the output. * @param compressionLevel Override default compression level with the given value, between 0 (fastest) and 9 (smallest). */ - public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, - final int compressionLevel) { + public SAMFileWriter makeBAMWriter( + final SAMFileHeader header, final boolean presorted, final File outputFile, final int compressionLevel) { return makeBAMWriter(header, presorted, outputFile.toPath(), compressionLevel); } @@ -284,19 +310,22 @@ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean pre * @param outputPath where to write the output. * @param compressionLevel Override default compression level with the given value, between 0 (fastest) and 9 (smallest). */ - public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath, - final int compressionLevel) { + public SAMFileWriter makeBAMWriter( + final SAMFileHeader header, final boolean presorted, final Path outputPath, final int compressionLevel) { try { final boolean createMd5File = this.createMd5File && IOUtil.isRegularPath(outputPath); if (this.createMd5File && !createMd5File) { - log.warn("Cannot create MD5 file for BAM because output file is not a regular file: " + outputPath.toUri()); + log.warn("Cannot create MD5 file for BAM because output file is not a regular file: " + + outputPath.toUri()); } OutputStream os = IOUtil.maybeBufferOutputStream(Files.newOutputStream(outputPath), bufferSize); - if (createMd5File) os = new Md5CalculatingOutputStream(os, IOUtil.addExtension(outputPath,".md5")); - final BAMFileWriter ret = new BAMFileWriter(os, outputPath.toUri().toString(), compressionLevel, deflaterFactory); + if (createMd5File) os = new Md5CalculatingOutputStream(os, IOUtil.addExtension(outputPath, ".md5")); + final BAMFileWriter ret = + new BAMFileWriter(os, outputPath.toUri().toString(), compressionLevel, deflaterFactory); final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputPath); if (this.createIndex && !createIndex) { - log.warn("Cannot create index for BAM because output file is not a regular file: " + outputPath.toUri()); + log.warn( + "Cannot create index for BAM because output file is not a regular file: " + outputPath.toUri()); } initializeBAMWriter(ret, header, presorted, createIndex); @@ -307,7 +336,11 @@ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean pre } } - private void initializeBAMWriter(final BAMFileWriter writer, final SAMFileHeader header, final boolean presorted, final boolean createIndex) { + private void initializeBAMWriter( + final BAMFileWriter writer, + final SAMFileHeader header, + final boolean presorted, + final boolean createIndex) { writer.setSortOrder(header.getSortOrder(), presorted); if (maxRecordsInRam != null) { writer.setMaxRecordsInRam(maxRecordsInRam); @@ -347,12 +380,12 @@ public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean pre } try { final SAMTextWriter ret = this.createMd5File - ? new SAMTextWriter(new Md5CalculatingOutputStream(Files.newOutputStream(outputPath), - IOUtil.addExtension(outputPath, ".md5")), samFlagFieldOutput) - : new SAMTextWriter(null == outputPath - ? null - : Files.newOutputStream(outputPath), - samFlagFieldOutput); + ? new SAMTextWriter( + new Md5CalculatingOutputStream( + Files.newOutputStream(outputPath), IOUtil.addExtension(outputPath, ".md5")), + samFlagFieldOutput) + : new SAMTextWriter( + null == outputPath ? null : Files.newOutputStream(outputPath), samFlagFieldOutput); return initWriter(header, presorted, ret); } catch (final IOException ioe) { throw new RuntimeIOException("Error opening file: " + outputPath.toUri(), ioe); @@ -388,9 +421,11 @@ public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean pre * @param stream the stream to write records to. Note that this method does not buffer the stream, so the * caller must buffer if desired. Note that PrintStream is buffered. */ - public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final OutputStream stream) { - return initWriter(header, presorted, new BAMFileWriter(stream, (File)null, this.getCompressionLevel(), this.deflaterFactory)); + return initWriter( + header, + presorted, + new BAMFileWriter(stream, (File) null, this.getCompressionLevel(), this.deflaterFactory)); } /** @@ -399,9 +434,8 @@ public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean pre * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder. * @param writer SAM or BAM writer to initialize and maybe wrap. */ - - private SAMFileWriter initWriter(final SAMFileHeader header, final boolean presorted, - final SAMFileWriterImpl writer) { + private SAMFileWriter initWriter( + final SAMFileHeader header, final boolean presorted, final SAMFileWriterImpl writer) { writer.setSortOrder(header.getSortOrder(), presorted); if (maxRecordsInRam != null) { writer.setMaxRecordsInRam(maxRecordsInRam); @@ -421,8 +455,9 @@ private SAMFileWriter initWriter(final SAMFileHeader header, final boolean preso * @param outputFile where to write the output. Must end with .sam or .bam. * @return SAM or BAM writer based on file extension of outputFile. */ - public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile) { - return makeSAMOrBAMWriter(header, presorted, outputFile.toPath()); + public SAMFileWriter makeSAMOrBAMWriter( + final SAMFileHeader header, final boolean presorted, final File outputFile) { + return makeSAMOrBAMWriter(header, presorted, outputFile.toPath()); } /** @@ -433,13 +468,15 @@ public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolea * @param outputPath where to write the output. Must end with .sam or .bam. * @return SAM or BAM writer based on file extension of outputPath. */ - public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath) { + public SAMFileWriter makeSAMOrBAMWriter( + final SAMFileHeader header, final boolean presorted, final Path outputPath) { final String filename = outputPath.getFileName().toString(); if (SAM_TYPE.hasValidFileExtension(filename)) { return makeSAMWriter(header, presorted, outputPath); } else { if (!BAM_TYPE.hasValidFileExtension(filename)) { - log.info("Unknown file extension, assuming BAM format when writing file: " + outputPath.toUri().toString()); + log.info("Unknown file extension, assuming BAM format when writing file: " + + outputPath.toUri().toString()); } return makeBAMWriter(header, presorted, outputPath); } @@ -457,8 +494,9 @@ public SAMFileWriter makeSAMOrBAMWriter(final SAMFileHeader header, final boolea * @return SAMFileWriter appropriate for SAM and CRAM file types specified in outputFile, or a BAM writer for all other types * */ - public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) { - return makeWriter(header, presorted, IOUtil.toPath( outputFile ), IOUtil.toPath(referenceFasta)); + public SAMFileWriter makeWriter( + final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) { + return makeWriter(header, presorted, IOUtil.toPath(outputFile), IOUtil.toPath(referenceFasta)); } /** @@ -473,8 +511,9 @@ public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presor * @deprecated since 6/18, use {@link #makeWriter(SAMFileHeader, boolean, Path, Path)} instead */ @Deprecated - public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath, final File referenceFasta) { - return makeWriter(header, presorted, outputPath, IOUtil.toPath( referenceFasta )); + public SAMFileWriter makeWriter( + final SAMFileHeader header, final boolean presorted, final Path outputPath, final File referenceFasta) { + return makeWriter(header, presorted, outputPath, IOUtil.toPath(referenceFasta)); } /** @@ -488,12 +527,13 @@ public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presor * @return SAMFileWriter appropriate for the file type specified in outputPath * */ - public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final Path outputPath, final Path referenceFasta) { + public SAMFileWriter makeWriter( + final SAMFileHeader header, final boolean presorted, final Path outputPath, final Path referenceFasta) { final String filename = outputPath.getFileName().toString(); if (CRAM_TYPE.hasValidFileExtension(filename)) { return makeCRAMWriter(header, presorted, outputPath, referenceFasta); } else { - return makeSAMOrBAMWriter (header, presorted, outputPath); + return makeSAMOrBAMWriter(header, presorted, outputPath); } } @@ -508,8 +548,9 @@ public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presor * @param referenceFasta reference sequence file * @return CRAMFileWriter */ - public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStream stream, final File referenceFasta) { - return makeCRAMWriter(header, stream, IOUtil.toPath( referenceFasta )); + public CRAMFileWriter makeCRAMWriter( + final SAMFileHeader header, final OutputStream stream, final File referenceFasta) { + return makeCRAMWriter(header, stream, IOUtil.toPath(referenceFasta)); } /** @@ -523,9 +564,16 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStr * @param referenceFasta reference sequence file * @return CRAMFileWriter */ - public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStream stream, final Path referenceFasta) { - // create the CRAMFileWriter directly without propagating factory settings - return new CRAMFileWriter(stream, new ReferenceSource(referenceFasta), header, null); + public CRAMFileWriter makeCRAMWriter( + final SAMFileHeader header, final OutputStream stream, final Path referenceFasta) { + return new CRAMFileWriter( + cramEncodingStrategy, + stream, + null, // no index + true, // presorted + new ReferenceSource(referenceFasta), + header, + null); } /** @@ -541,7 +589,7 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStr * */ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final File outputFile, final File referenceFasta) { - return createCRAMWriterWithSettings(header, true, outputFile.toPath(), IOUtil.toPath( referenceFasta )); + return createCRAMWriterWithSettings(header, true, outputFile.toPath(), IOUtil.toPath(referenceFasta)); } /** @@ -560,7 +608,7 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final File outp */ @Deprecated public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final Path outputPath, final File referenceFasta) { - return makeCRAMWriter(header, true, outputPath, IOUtil.toPath( referenceFasta )); + return makeCRAMWriter(header, true, outputPath, IOUtil.toPath(referenceFasta)); } /** @@ -575,11 +623,11 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final Path outp * @return CRAMFileWriter * */ - public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) { - return makeCRAMWriter(header, presorted, outputFile.toPath(), IOUtil.toPath(referenceFasta)); + public CRAMFileWriter makeCRAMWriter( + final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) { + return makeCRAMWriter(header, presorted, outputFile.toPath(), IOUtil.toPath(referenceFasta)); } - /** * Create a CRAMFileWriter on an output file. * @@ -596,8 +644,9 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean p * */ @Deprecated - public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean presorted, final Path output, final File referenceFasta) { - return makeCRAMWriter(header, presorted, output, IOUtil.toPath( referenceFasta )); + public CRAMFileWriter makeCRAMWriter( + final SAMFileHeader header, final boolean presorted, final Path output, final File referenceFasta) { + return makeCRAMWriter(header, presorted, output, IOUtil.toPath(referenceFasta)); } /** @@ -612,7 +661,8 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean p * @return CRAMFileWriter * */ - public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean presorted, final Path output, final Path referenceFasta) { + public CRAMFileWriter makeCRAMWriter( + final SAMFileHeader header, final boolean presorted, final Path output, final Path referenceFasta) { return createCRAMWriterWithSettings(header, presorted, output, referenceFasta); } @@ -628,17 +678,16 @@ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean p * @return CRAMFileWriter */ private CRAMFileWriter createCRAMWriterWithSettings( - final SAMFileHeader header, - final boolean presorted, - final Path outputFile, - final Path referenceFasta) { + final SAMFileHeader header, final boolean presorted, final Path outputFile, final Path referenceFasta) { final CRAMReferenceSource referenceSource; if (referenceFasta == null) { - log.info("Reference fasta is not provided when writing CRAM file " + outputFile.toUri().toString()); + log.info("Reference fasta is not provided when writing CRAM file " + + outputFile.toUri().toString()); log.info("Will attempt to use a default reference or download as set by defaults:"); log.info("Default REFERENCE_FASTA (-Dsamjdk.reference_fasta): " + Defaults.REFERENCE_FASTA); - log.info("Default USE_CRAM_REF_DOWNLOAD (-Dsamjdk.use_cram_ref_download): " + Defaults.USE_CRAM_REF_DOWNLOAD); + log.info("Default USE_CRAM_REF_DOWNLOAD (-Dsamjdk.use_cram_ref_download): " + + Defaults.USE_CRAM_REF_DOWNLOAD); referenceSource = ReferenceSource.getDefaultCRAMReferenceSource(); } else { @@ -649,14 +698,14 @@ private CRAMFileWriter createCRAMWriterWithSettings( if (createIndex) { if (!IOUtil.isRegularPath(outputFile)) { - log.warn("Cannot create index for CRAM because output file is not a regular file: " + outputFile.toUri()); + log.warn("Cannot create index for CRAM because output file is not a regular file: " + + outputFile.toUri()); } else { final Path indexPath = IOUtil.addExtension(outputFile, FileExtensions.BAI_INDEX); try { - indexOS = Files.newOutputStream(indexPath) ; - } - catch (final IOException ioe) { + indexOS = Files.newOutputStream(indexPath); + } catch (final IOException ioe) { throw new RuntimeIOException("Error creating index file for: " + indexPath.toUri(), ioe); } } @@ -670,23 +719,17 @@ private CRAMFileWriter createCRAMWriterWithSettings( final Path md5Path = IOUtil.addExtension(outputFile, ".md5"); final CRAMFileWriter writer = new CRAMFileWriter( + cramEncodingStrategy, createMd5File ? new Md5CalculatingOutputStream(cramOS, md5Path) : cramOS, indexOS, presorted, referenceSource, header, outputFile.toUri().toString()); - setCRAMWriterDefaults(writer); return writer; } - // Set the default CRAM writer preservation parameters - private void setCRAMWriterDefaults(final CRAMFileWriter writer) { - //TODO: set encoding params - //writer.setEncodingParams(new CRAMEncodingStrategy()); - } - @Override public String toString() { return "SAMFileWriterFactory [createIndex=" + createIndex + ", createMd5File=" + createMd5File + ", useAsyncIo=" @@ -694,5 +737,4 @@ public String toString() { + ", tmpDir=" + tmpDir + ", compressionLevel=" + compressionLevel + ", maxRecordsInRam=" + maxRecordsInRam + "]"; } - } diff --git a/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java b/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java index 3b89e09ae9..80139675aa 100644 --- a/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java +++ b/src/main/java/htsjdk/samtools/SAMFileWriterImpl.java @@ -23,22 +23,21 @@ */ package htsjdk.samtools; +import static htsjdk.samtools.SAMFileHeader.SortOrder; + import htsjdk.samtools.util.ProgressLoggerInterface; import htsjdk.samtools.util.SortingCollection; - import java.io.File; import java.io.StringWriter; -import static htsjdk.samtools.SAMFileHeader.SortOrder; /** * Base class for implementing SAM writer with any underlying format. - * Mostly this manages accumulation & sorting of SAMRecords when appropriate, + * Mostly this manages accumulation and sorting of SAMRecords when appropriate, * and produces the text version of the header, since that seems to be a popular item * in both text and binary file formats. */ -public abstract class SAMFileWriterImpl implements SAMFileWriter -{ - private static int DEAFULT_MAX_RECORDS_IN_RAM = 500000; +public abstract class SAMFileWriterImpl implements SAMFileWriter { + private static int DEAFULT_MAX_RECORDS_IN_RAM = 500000; private int maxRecordsInRam = DEAFULT_MAX_RECORDS_IN_RAM; private SAMFileHeader.SortOrder sortOrder; private SAMFileHeader header; @@ -60,16 +59,16 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter * @param maxRecordsInRam */ public static void setDefaultMaxRecordsInRam(final int maxRecordsInRam) { - DEAFULT_MAX_RECORDS_IN_RAM = maxRecordsInRam; + DEAFULT_MAX_RECORDS_IN_RAM = maxRecordsInRam; } - + /** - * When writing records that are not presorted, this number determines the + * When writing records that are not presorted, this number determines the * number of records stored in RAM before spilling to disk. - * @return DEAFULT_MAX_RECORDS_IN_RAM + * @return DEAFULT_MAX_RECORDS_IN_RAM */ public static int getDefaultMaxRecordsInRam() { - return DEAFULT_MAX_RECORDS_IN_RAM; + return DEAFULT_MAX_RECORDS_IN_RAM; } /** @@ -87,8 +86,8 @@ public void setProgressLogger(final ProgressLoggerInterface progress) { */ public void setSortOrder(final SAMFileHeader.SortOrder sortOrder, final boolean presorted) { if (header != null) { - throw new IllegalStateException("Cannot call SAMFileWriterImpl.setSortOrder after setHeader for " + - getFilename()); + throw new IllegalStateException( + "Cannot call SAMFileWriterImpl.setSortOrder after setHeader for " + getFilename()); } this.sortOrder = sortOrder; this.presorted = presorted; @@ -97,15 +96,14 @@ public void setSortOrder(final SAMFileHeader.SortOrder sortOrder, final boolean @Override public void setSortOrderChecking(boolean check) { - final boolean doCheck = check && - this.presorted && - this.sortOrder != SAMFileHeader.SortOrder.unsorted && - this.sortOrder != SortOrder.unknown; + final boolean doCheck = check + && this.presorted + && this.sortOrder != SAMFileHeader.SortOrder.unsorted + && this.sortOrder != SortOrder.unknown; if (doCheck) { this.sortOrderChecker = new SAMSortOrderChecker(this.sortOrder); - } - else { + } else { this.sortOrderChecker = null; } } @@ -134,12 +132,12 @@ protected int getMaxRecordsInRam() { } /** - * When writing records that are not presorted, specify the path of the temporary directory + * When writing records that are not presorted, specify the path of the temporary directory * for spilling to disk. Must be called before setHeader(). * @param tmpDir path to the temporary directory */ protected void setTempDirectory(final File tmpDir) { - if (tmpDir!=null) { + if (tmpDir != null) { this.tmpDir = tmpDir; } } @@ -151,14 +149,13 @@ protected File getTempDirectory() { /** * Must be called before addAlignment. Header cannot be null. */ - public void setHeader(final SAMFileHeader header) - { + public void setHeader(final SAMFileHeader header) { if (null == header) { throw new IllegalArgumentException("A non-null SAMFileHeader is required for a writer"); } this.header = header; if (this.sortOrder == null) { - this.sortOrder = SAMFileHeader.SortOrder.unsorted; + this.sortOrder = SAMFileHeader.SortOrder.unsorted; } header.setSortOrder(this.sortOrder); @@ -170,8 +167,12 @@ public void setHeader(final SAMFileHeader header) } setSortOrderChecking(true); } else if (!sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) { - alignmentSorter = SortingCollection.newInstance(SAMRecord.class, - new BAMRecordCodec(header), sortOrder.getComparatorInstance(), maxRecordsInRam, tmpDir); + alignmentSorter = SortingCollection.newInstance( + SAMRecord.class, + new BAMRecordCodec(header), + sortOrder.getComparatorInstance(), + maxRecordsInRam, + tmpDir); } } @@ -190,8 +191,7 @@ public SAMFileHeader getFileHeader() { * resolved against the writer's header using the current reference and mate reference names */ @Override - public void addAlignment(final SAMRecord alignment) - { + public void addAlignment(final SAMRecord alignment) { alignment.setHeaderStrict(header); // re-establish the record header and resolve reference indices if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) { writeAlignment(alignment); @@ -206,10 +206,11 @@ public void addAlignment(final SAMRecord alignment) private void assertPresorted(final SAMRecord alignment) { if (this.sortOrderChecker != null && !sortOrderChecker.isSorted(alignment)) { final SAMRecord prev = sortOrderChecker.getPreviousRecord(); - throw new IllegalArgumentException("Alignments added out of order in SAMFileWriterImpl.addAlignment for " + - getFilename() + ". Sort order is " + this.sortOrder + ". Offending records are at [" - + sortOrderChecker.getSortKey(prev) + "] and [" - + sortOrderChecker.getSortKey(alignment) + "]"); + throw new IllegalArgumentException( + "Alignments added out of order in SAMFileWriterImpl.addAlignment for " + getFilename() + + ". Sort order is " + this.sortOrder + ". Offending records are at [" + + sortOrderChecker.getSortKey(prev) + "] and [" + + sortOrderChecker.getSortKey(alignment) + "]"); } } @@ -217,16 +218,14 @@ private void assertPresorted(final SAMRecord alignment) { * Must be called or else file will likely be defective. */ @Override - public final void close() - { + public final void close() { try { if (!isClosed) { if (alignmentSorter != null) { try { for (final SAMRecord alignment : alignmentSorter) { writeAlignment(alignment); - if (progressLogger != null) - progressLogger.record(alignment); + if (progressLogger != null) progressLogger.record(alignment); } } finally { alignmentSorter.cleanup(); @@ -244,7 +243,7 @@ public final void close() * this method is called. The record must hava a non-null SAMFileHeader. * @param alignment */ - abstract protected void writeAlignment(SAMRecord alignment); + protected abstract void writeAlignment(SAMRecord alignment); /** * Write the header to disk. Header object is available via getHeader(). @@ -252,7 +251,7 @@ public final void close() * @deprecated since 06/2018. {@link #writeHeader(SAMFileHeader)} is preferred for avoid String construction if not need it. */ @Deprecated - abstract protected void writeHeader(String textHeader); + protected abstract void writeHeader(String textHeader); /** * Write the header to disk. Header object is available via getHeader(). @@ -273,11 +272,11 @@ protected void writeHeader(final SAMFileHeader header) { /** * Do any required flushing here. */ - abstract protected void finish(); + protected abstract void finish(); /** * For producing error messages. * @return Output filename, or null if there isn't one. */ - abstract protected String getFilename(); + protected abstract String getFilename(); } diff --git a/src/main/java/htsjdk/samtools/SAMFlag.java b/src/main/java/htsjdk/samtools/SAMFlag.java index 8451b898aa..c5bf7d2737 100644 --- a/src/main/java/htsjdk/samtools/SAMFlag.java +++ b/src/main/java/htsjdk/samtools/SAMFlag.java @@ -31,22 +31,21 @@ * SAM flags as enum, to be used in GUI, menu, etc... */ public enum SAMFlag { - READ_PAIRED( 0x1, "Template having multiple segments in sequencing"), - PROPER_PAIR( 0x2, "Each segment properly aligned according to the aligner"), - READ_UNMAPPED( 0x4, "Segment unmapped"), - MATE_UNMAPPED( 0x8, "Next segment in the template unmapped"), - READ_REVERSE_STRAND( 0x10, "SEQ being reverse complemented"), - MATE_REVERSE_STRAND( 0x20, "SEQ of the next segment in the template being reverse complemented"), - FIRST_OF_PAIR( 0x40, "The first segment in the template"), - SECOND_OF_PAIR( 0x80, "The last segment in the template"), - SECONDARY_ALIGNMENT( 0x100, "Secondary alignment"), + READ_PAIRED(0x1, "Template having multiple segments in sequencing"), + PROPER_PAIR(0x2, "Each segment properly aligned according to the aligner"), + READ_UNMAPPED(0x4, "Segment unmapped"), + MATE_UNMAPPED(0x8, "Next segment in the template unmapped"), + READ_REVERSE_STRAND(0x10, "SEQ being reverse complemented"), + MATE_REVERSE_STRAND(0x20, "SEQ of the next segment in the template being reverse complemented"), + FIRST_OF_PAIR(0x40, "The first segment in the template"), + SECOND_OF_PAIR(0x80, "The last segment in the template"), + SECONDARY_ALIGNMENT(0x100, "Secondary alignment"), /** @deprecated use {@link #SECONDARY_ALIGNMENT} instead. */ @Deprecated - NOT_PRIMARY_ALIGNMENT( 0x100, "Secondary alignment"), - READ_FAILS_VENDOR_QUALITY_CHECK(0x200, "Not passing quality controls"), - DUPLICATE_READ( 0x400, "PCR or optical duplicate"), - SUPPLEMENTARY_ALIGNMENT( 0x800, "Supplementary alignment") - ; + NOT_PRIMARY_ALIGNMENT(0x100, "Secondary alignment"), + READ_FAILS_VENDOR_QUALITY_CHECK(0x200, "Not passing quality controls"), + DUPLICATE_READ(0x400, "PCR or optical duplicate"), + SUPPLEMENTARY_ALIGNMENT(0x800, "Supplementary alignment"); /* visible for the package, to be used by SAMRecord */ final int flag; @@ -75,18 +74,15 @@ public String getDescription() { /** @return the SAMFlag for the value 'flag' or null if it was not found */ public static SAMFlag valueOf(int flag) { for (SAMFlag f : values()) { - if (flag == f.flag) - return f; + if (flag == f.flag) return f; } return null; } /** @return find SAMFlag the flag by name, or null if it was not found */ - public static SAMFlag findByName(String flag) - { + public static SAMFlag findByName(String flag) { for (SAMFlag f : values()) { - if (f.name().equals(flag)) - return f; + if (f.name().equals(flag)) return f; } return null; } @@ -105,8 +101,7 @@ public boolean isUnset(int flag) { public static Set getFlags(int flag) { Set set = new HashSet(); for (SAMFlag f : values()) { - if (f.isSet(flag)) - set.add(f); + if (f.isSet(flag)) set.add(f); } return set; } diff --git a/src/main/java/htsjdk/samtools/SAMFormatException.java b/src/main/java/htsjdk/samtools/SAMFormatException.java index bce82bc1de..62a51f3f4c 100644 --- a/src/main/java/htsjdk/samtools/SAMFormatException.java +++ b/src/main/java/htsjdk/samtools/SAMFormatException.java @@ -27,8 +27,7 @@ * Thrown when a SAM file being read or decoded (text or binary) looks bad. */ public class SAMFormatException extends SAMException { - public SAMFormatException() { - } + public SAMFormatException() {} public SAMFormatException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/SAMHeaderRecordComparator.java b/src/main/java/htsjdk/samtools/SAMHeaderRecordComparator.java index f48df4d338..a9eb3e9c59 100644 --- a/src/main/java/htsjdk/samtools/SAMHeaderRecordComparator.java +++ b/src/main/java/htsjdk/samtools/SAMHeaderRecordComparator.java @@ -23,7 +23,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ - import java.io.Serializable; import java.util.Comparator; @@ -33,36 +32,36 @@ * record) sort behind those that have values. */ public class SAMHeaderRecordComparator implements Comparator, Serializable { - private static final long serialVersionUID = 1L; + private static final long serialVersionUID = 1L; - private final String[] attributes; + private final String[] attributes; - public SAMHeaderRecordComparator(final String... attributes) { - this.attributes = attributes; - } + public SAMHeaderRecordComparator(final String... attributes) { + this.attributes = attributes; + } - @Override - public int compare(final T left, final T right) { - for (final String attribute : attributes) { - final String leftValue = left.getAttribute(attribute); - final String rightValue = right.getAttribute(attribute); + @Override + public int compare(final T left, final T right) { + for (final String attribute : attributes) { + final String leftValue = left.getAttribute(attribute); + final String rightValue = right.getAttribute(attribute); - if (leftValue == null) { - // Fastest comparison possible; two empty values are - // equivalent, so move along to the next attribute - if (rightValue == null) continue; + if (leftValue == null) { + // Fastest comparison possible; two empty values are + // equivalent, so move along to the next attribute + if (rightValue == null) continue; - // Otherwise left < right, since right has a value - else return -1; - } + // Otherwise left < right, since right has a value + else return -1; + } - // left is not null; if right is, left > right - if (rightValue == null) return 1; + // left is not null; if right is, left > right + if (rightValue == null) return 1; - final int compare = leftValue.compareTo(rightValue); - if (compare != 0) return compare; - } + final int compare = leftValue.compareTo(rightValue); + if (compare != 0) return compare; + } - return 0; - } + return 0; + } } diff --git a/src/main/java/htsjdk/samtools/SAMLineParser.java b/src/main/java/htsjdk/samtools/SAMLineParser.java index c015aa0a5a..d7524f8560 100644 --- a/src/main/java/htsjdk/samtools/SAMLineParser.java +++ b/src/main/java/htsjdk/samtools/SAMLineParser.java @@ -24,7 +24,6 @@ package htsjdk.samtools; import htsjdk.samtools.util.StringUtil; - import java.io.File; import java.util.List; import java.util.Map; @@ -62,6 +61,7 @@ public class SAMLineParser { * Add information about the origin (reader and position) to SAM records. */ private final SamReader mParentReader; + private final SAMRecordFactory samRecordFactory; private final ValidationStringency validationStringency; private final SAMFileHeader mFileHeader; @@ -84,9 +84,7 @@ public class SAMLineParser { */ public SAMLineParser(final SAMFileHeader samFileHeader) { - this(new DefaultSAMRecordFactory(), - ValidationStringency.DEFAULT_STRINGENCY, samFileHeader, - null, null); + this(new DefaultSAMRecordFactory(), ValidationStringency.DEFAULT_STRINGENCY, samFileHeader, null, null); } /** @@ -96,12 +94,14 @@ public SAMLineParser(final SAMFileHeader samFileHeader) { * @param samFileReader SAM file reader For passing to SAMRecord.setFileSource, may be null. * @param samFile SAM file being read (for error message only, may be null) */ - public SAMLineParser(final SAMFileHeader samFileHeader, - final SamReader samFileReader, final File samFile) { - - this(new DefaultSAMRecordFactory(), - ValidationStringency.DEFAULT_STRINGENCY, samFileHeader, - samFileReader, samFile); + public SAMLineParser(final SAMFileHeader samFileHeader, final SamReader samFileReader, final File samFile) { + + this( + new DefaultSAMRecordFactory(), + ValidationStringency.DEFAULT_STRINGENCY, + samFileHeader, + samFileReader, + samFile); } /** @@ -113,19 +113,18 @@ public SAMLineParser(final SAMFileHeader samFileHeader, * @param samFileReader SAM file reader For passing to SAMRecord.setFileSource, may be null. * @param samFile SAM file being read (for error message only, may be null) */ - public SAMLineParser(final SAMRecordFactory samRecordFactory, - final ValidationStringency validationStringency, - final SAMFileHeader samFileHeader, final SamReader samFileReader, - final File samFile) { + public SAMLineParser( + final SAMRecordFactory samRecordFactory, + final ValidationStringency validationStringency, + final SAMFileHeader samFileHeader, + final SamReader samFileReader, + final File samFile) { - if (samRecordFactory == null) - throw new NullPointerException("The SamRecordFactory must be set"); + if (samRecordFactory == null) throw new NullPointerException("The SamRecordFactory must be set"); - if (validationStringency == null) - throw new NullPointerException("The validationStringency must be set"); + if (validationStringency == null) throw new NullPointerException("The validationStringency must be set"); - if (samFileHeader == null) - throw new NullPointerException("The mFileHeader must be set"); + if (samFileHeader == null) throw new NullPointerException("The mFileHeader must be set"); this.samRecordFactory = samRecordFactory; this.validationStringency = validationStringency; @@ -175,7 +174,7 @@ private int parseInt(final String s, final String fieldName) { } return ret; } - + private int parseFlag(final String s, final String fieldName) { try { return samFlagField.isPresent() ? samFlagField.get().parse(s) : SamFlagField.parseDefault(s); @@ -191,13 +190,11 @@ private void validateReferenceName(final String rname, final String fieldName) { if (fieldName.equals("MRNM")) { return; } - reportErrorParsingLine("= is not a valid value for " - + fieldName + " field."); + reportErrorParsingLine("= is not a valid value for " + fieldName + " field."); } if (!this.mFileHeader.getSequenceDictionary().isEmpty()) { if (this.mFileHeader.getSequence(rname) == null) { - reportErrorParsingLine(fieldName - + " '" + rname + "' not found in any SQ record"); + reportErrorParsingLine(fieldName + " '" + rname + "' not found in any SQ record"); } } } @@ -218,7 +215,7 @@ public SAMRecord parseLine(final String line) { * * @param line line to parse * @param lineNumber line number in the file. If the line number is not known - * can be <=0. + * can be {@code <=0}. * @return a new SAMRecord object */ public SAMRecord parseLine(final String line, final int lineNumber) { @@ -238,11 +235,9 @@ public SAMRecord parseLine(final String line, final int lineNumber) { reportErrorParsingLine("Empty field at position " + i + " (zero-based)"); } } - final SAMRecord samRecord = - samRecordFactory.createSAMRecord(this.mFileHeader); + final SAMRecord samRecord = samRecordFactory.createSAMRecord(this.mFileHeader); samRecord.setValidationStringency(this.validationStringency); - if (mParentReader != null) - samRecord.setFileSource(new SAMFileSource(mParentReader, null)); + if (mParentReader != null) samRecord.setFileSource(new SAMFileSource(mParentReader, null)); samRecord.setHeader(this.mFileHeader); samRecord.setReadName(mFields[QNAME_COL]); @@ -261,8 +256,7 @@ public SAMRecord parseLine(final String line, final int lineNumber) { final int pos = parseInt(mFields[POS_COL], "POS"); final int mapq = parseInt(mFields[MAPQ_COL], "MAPQ"); final String cigar = mFields[CIGAR_COL]; - if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(samRecord - .getReferenceName())) { + if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(samRecord.getReferenceName())) { if (pos == 0) { reportErrorParsingLine("POS must be non-zero if RNAME is specified"); } @@ -309,8 +303,7 @@ public SAMRecord parseLine(final String line, final int lineNumber) { final int matePos = parseInt(mFields[MPOS_COL], "MPOS"); final int isize = parseInt(mFields[ISIZE_COL], "ISIZE"); - if (!samRecord.getMateReferenceName().equals( - SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { + if (!samRecord.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { if (matePos == 0) { reportErrorParsingLine("MPOS must be non-zero if MRNM is specified"); } @@ -362,11 +355,11 @@ public SAMRecord parseLine(final String line, final int lineNumber) { private void validateReadBases(final String bases) { /* - * Using regex is slow, so check for invalid characters via - * isValidReadBase(), which hopefully the JIT will optimize. if - * (!VALID_BASES.matcher(bases).matches()) { - * reportErrorParsingLine("Invalid character in read bases"); } - */ + * Using regex is slow, so check for invalid characters via + * isValidReadBase(), which hopefully the JIT will optimize. if + * (!VALID_BASES.matcher(bases).matches()) { + * reportErrorParsingLine("Invalid character in read bases"); } + */ for (int i = 0; i < bases.length(); ++i) { if (!isValidReadBase(bases.charAt(i))) { reportErrorParsingLine("Invalid character in read bases"); @@ -424,11 +417,9 @@ private void parseTag(final SAMRecord samRecord, final String tag) { } if (entry != null) { if (entry.getValue() instanceof TagValueAndUnsignedArrayFlag) { - final TagValueAndUnsignedArrayFlag valueAndFlag = - (TagValueAndUnsignedArrayFlag) entry.getValue(); + final TagValueAndUnsignedArrayFlag valueAndFlag = (TagValueAndUnsignedArrayFlag) entry.getValue(); if (valueAndFlag.isUnsignedArray) { - samRecord.setUnsignedArrayAttribute(entry.getKey(), - valueAndFlag.value); + samRecord.setUnsignedArrayAttribute(entry.getKey(), valueAndFlag.value); } else { samRecord.setAttribute(entry.getKey(), valueAndFlag.value); } @@ -456,8 +447,7 @@ private void reportErrorParsingLine(final String reason) { if (validationStringency == ValidationStringency.STRICT) { throw new SAMFormatException(errorMessage); } else if (validationStringency == ValidationStringency.LENIENT) { - System.err - .println("Ignoring SAM validation error due to lenient parsing:"); + System.err.println("Ignoring SAM validation error due to lenient parsing:"); System.err.println(errorMessage); } } @@ -482,5 +472,4 @@ private String makeErrorString(final String reason) { + (this.currentLineNumber <= 0 ? "unknown" : this.currentLineNumber) + "\nLine: " + this.currentLine; } - } diff --git a/src/main/java/htsjdk/samtools/SAMProgramRecord.java b/src/main/java/htsjdk/samtools/SAMProgramRecord.java index f5ddd964a6..86cabf661c 100644 --- a/src/main/java/htsjdk/samtools/SAMProgramRecord.java +++ b/src/main/java/htsjdk/samtools/SAMProgramRecord.java @@ -39,12 +39,12 @@ public class SAMProgramRecord extends AbstractSAMHeaderRecord { public static final String COMMAND_LINE_TAG = "CL"; public static final String PREVIOUS_PROGRAM_GROUP_ID_TAG = "PP"; private String mProgramGroupId; - public static final Set STANDARD_TAGS = Collections.unmodifiableSet( - new HashSet(Arrays.asList(PROGRAM_GROUP_ID_TAG, - PROGRAM_NAME_TAG, - PROGRAM_VERSION_TAG, - COMMAND_LINE_TAG, - PREVIOUS_PROGRAM_GROUP_ID_TAG)) ); + public static final Set STANDARD_TAGS = Collections.unmodifiableSet(new HashSet(Arrays.asList( + PROGRAM_GROUP_ID_TAG, + PROGRAM_NAME_TAG, + PROGRAM_VERSION_TAG, + COMMAND_LINE_TAG, + PREVIOUS_PROGRAM_GROUP_ID_TAG))); public SAMProgramRecord(final String programGroupId) { this.mProgramGroupId = programGroupId; @@ -67,7 +67,7 @@ public String getProgramGroupId() { } public String getProgramName() { - return (String)getAttribute(PROGRAM_NAME_TAG); + return (String) getAttribute(PROGRAM_NAME_TAG); } public void setProgramName(final String name) { @@ -75,7 +75,7 @@ public void setProgramName(final String name) { } public String getProgramVersion() { - return (String)getAttribute(PROGRAM_VERSION_TAG); + return (String) getAttribute(PROGRAM_VERSION_TAG); } public void setProgramVersion(final String version) { @@ -83,7 +83,7 @@ public void setProgramVersion(final String version) { } public String getCommandLine() { - return (String)getAttribute(COMMAND_LINE_TAG); + return (String) getAttribute(COMMAND_LINE_TAG); } public void setCommandLine(final String commandLine) { @@ -91,15 +91,13 @@ public void setCommandLine(final String commandLine) { } public String getPreviousProgramGroupId() { - return (String)getAttribute(PREVIOUS_PROGRAM_GROUP_ID_TAG); + return (String) getAttribute(PREVIOUS_PROGRAM_GROUP_ID_TAG); } public void setPreviousProgramGroupId(final String id) { setAttribute(PREVIOUS_PROGRAM_GROUP_ID_TAG, id); } - - /** * @return true if this == that except for the program group ID, which is arbitrary */ @@ -115,7 +113,8 @@ public boolean equals(final Object o) { final SAMProgramRecord that = (SAMProgramRecord) o; if (!attributesEqual(that)) return false; - if (mProgramGroupId != null ? !mProgramGroupId.equals(that.mProgramGroupId) : that.mProgramGroupId != null) return false; + if (mProgramGroupId != null ? !mProgramGroupId.equals(that.mProgramGroupId) : that.mProgramGroupId != null) + return false; return true; } @@ -132,7 +131,6 @@ Set getStandardTags() { return STANDARD_TAGS; } - @Override public String getSAMString() { return new SAMTextHeaderCodec().getPGLine(this); diff --git a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java index 7a7c14e24e..4eb8b36687 100644 --- a/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java +++ b/src/main/java/htsjdk/samtools/SAMReadGroupRecord.java @@ -23,10 +23,8 @@ */ package htsjdk.samtools; - import htsjdk.samtools.util.Iso8601Date; import htsjdk.samtools.util.SamConstants; - import java.util.Arrays; import java.util.Collections; import java.util.Date; @@ -38,8 +36,7 @@ /** * Header information about a read group. */ -public class SAMReadGroupRecord extends AbstractSAMHeaderRecord -{ +public class SAMReadGroupRecord extends AbstractSAMHeaderRecord { private String mReadGroupId = null; public static final String READ_GROUP_ID_TAG = "ID"; public static final String SEQUENCING_CENTER_TAG = "CN"; @@ -56,15 +53,15 @@ public class SAMReadGroupRecord extends AbstractSAMHeaderRecord public static final String READ_GROUP_SAMPLE_TAG = "SM"; public static final String BARCODE_TAG = "BC"; - /* Platform values for the @RG-PL tag */ public enum PlatformValue { /** @deprecated Use {@linkplain PlatformValue#DNBSEQ} instead. */ - @Deprecated BGI, + @Deprecated + BGI, /** Capillary */ CAPILLARY, - + /** MGI/BGI */ DNBSEQ, @@ -87,11 +84,12 @@ public enum PlatformValue { ONT, /** @deprecated OTHER is not an official value. It is recommended to omit PL if it is not in this list or is unknown. */ - @Deprecated OTHER, + @Deprecated + OTHER, /** Pacific Biotechnology */ PACBIO, - + /** Singular Genomics */ SINGULAR, @@ -102,13 +100,25 @@ public enum PlatformValue { ULTIMA } - public static final Set STANDARD_TAGS = - new HashSet<>(Arrays.asList(READ_GROUP_ID_TAG, SEQUENCING_CENTER_TAG, DESCRIPTION_TAG, - DATE_RUN_PRODUCED_TAG, FLOW_ORDER_TAG, KEY_SEQUENCE_TAG, LIBRARY_TAG, - PROGRAM_GROUP_TAG, PREDICTED_MEDIAN_INSERT_SIZE_TAG, PLATFORM_TAG, PLATFORM_MODEL_TAG, - PLATFORM_UNIT_TAG, READ_GROUP_SAMPLE_TAG, BARCODE_TAG)); - - public SAMReadGroupRecord(final String id) { mReadGroupId = id; } + public static final Set STANDARD_TAGS = new HashSet<>(Arrays.asList( + READ_GROUP_ID_TAG, + SEQUENCING_CENTER_TAG, + DESCRIPTION_TAG, + DATE_RUN_PRODUCED_TAG, + FLOW_ORDER_TAG, + KEY_SEQUENCE_TAG, + LIBRARY_TAG, + PROGRAM_GROUP_TAG, + PREDICTED_MEDIAN_INSERT_SIZE_TAG, + PLATFORM_TAG, + PLATFORM_MODEL_TAG, + PLATFORM_UNIT_TAG, + READ_GROUP_SAMPLE_TAG, + BARCODE_TAG)); + + public SAMReadGroupRecord(final String id) { + mReadGroupId = id; + } public SAMReadGroupRecord(final String id, final SAMReadGroupRecord srcProgramRecord) { mReadGroupId = id; @@ -118,20 +128,45 @@ public SAMReadGroupRecord(final String id, final SAMReadGroupRecord srcProgramRe } @Override - public String getId() { return getReadGroupId(); } - public String getReadGroupId() { return mReadGroupId; } + public String getId() { + return getReadGroupId(); + } + + public String getReadGroupId() { + return mReadGroupId; + } - public String getSample() { return getAttribute(READ_GROUP_SAMPLE_TAG); } - public void setSample(final String value) { setAttribute(READ_GROUP_SAMPLE_TAG, value); } + public String getSample() { + return getAttribute(READ_GROUP_SAMPLE_TAG); + } + + public void setSample(final String value) { + setAttribute(READ_GROUP_SAMPLE_TAG, value); + } - public String getLibrary() { return getAttribute(LIBRARY_TAG); } - public void setLibrary(final String value) { setAttribute(LIBRARY_TAG, value); } + public String getLibrary() { + return getAttribute(LIBRARY_TAG); + } + + public void setLibrary(final String value) { + setAttribute(LIBRARY_TAG, value); + } - public String getPlatformUnit() { return getAttribute(PLATFORM_UNIT_TAG); } - public void setPlatformUnit(final String pu) { setAttribute(PLATFORM_UNIT_TAG, pu); } + public String getPlatformUnit() { + return getAttribute(PLATFORM_UNIT_TAG); + } + + public void setPlatformUnit(final String pu) { + setAttribute(PLATFORM_UNIT_TAG, pu); + } + + public String getPlatform() { + return getAttribute(PLATFORM_TAG); + } - public String getPlatform() { return getAttribute(PLATFORM_TAG); } - public void setPlatform(final String platform) { setAttribute(PLATFORM_TAG, platform); } + public void setPlatform(final String platform) { + setAttribute(PLATFORM_TAG, platform); + } /** * @return the List of barcodes associated with this read group or null @@ -159,7 +194,7 @@ public void setBarcodes(final List barcodes) { if (barcodes.stream().anyMatch(String::isEmpty)) { throw new IllegalArgumentException("A barcode must not be an empty String"); } - setAttribute(BARCODE_TAG, String.join(SamConstants.BARCODE_SEQUENCE_DELIMITER, barcodes)); + setAttribute(BARCODE_TAG, String.join(SamConstants.BARCODE_SEQUENCE_DELIMITER, barcodes)); } } @@ -172,11 +207,21 @@ public Date getRunDate() { } } - public String getFlowOrder() { return getAttribute(FLOW_ORDER_TAG); } - public void setFlowOrder(final String flowOrder) { setAttribute(FLOW_ORDER_TAG, flowOrder); } + public String getFlowOrder() { + return getAttribute(FLOW_ORDER_TAG); + } + + public void setFlowOrder(final String flowOrder) { + setAttribute(FLOW_ORDER_TAG, flowOrder); + } - public String getKeySequence() { return getAttribute(KEY_SEQUENCE_TAG); } - public void setKeySequence(final String keySequence) { setAttribute(KEY_SEQUENCE_TAG, keySequence); } + public String getKeySequence() { + return getAttribute(KEY_SEQUENCE_TAG); + } + + public void setKeySequence(final String keySequence) { + setAttribute(KEY_SEQUENCE_TAG, keySequence); + } /** * Converts to Iso8601Date if not already in that form. @@ -188,30 +233,53 @@ public void setRunDate(Date runDate) { setAttribute(DATE_RUN_PRODUCED_TAG, runDate != null ? runDate.toString() : null); } - public String getSequencingCenter() { return getAttribute(SEQUENCING_CENTER_TAG); } - public void setSequencingCenter(final String center) { setAttribute(SEQUENCING_CENTER_TAG, center); } + public String getSequencingCenter() { + return getAttribute(SEQUENCING_CENTER_TAG); + } + + public void setSequencingCenter(final String center) { + setAttribute(SEQUENCING_CENTER_TAG, center); + } + + public String getDescription() { + return getAttribute(DESCRIPTION_TAG); + } - public String getDescription() { return getAttribute(DESCRIPTION_TAG); } - public void setDescription(final String description) { setAttribute(DESCRIPTION_TAG, description); } + public void setDescription(final String description) { + setAttribute(DESCRIPTION_TAG, description); + } public Integer getPredictedMedianInsertSize() { final String stringRep = getAttribute(PREDICTED_MEDIAN_INSERT_SIZE_TAG); if (stringRep == null) { return null; } - return Integer.parseInt(stringRep); + return Integer.parseInt(stringRep); } + public void setPredictedMedianInsertSize(final Integer predictedMedianInsertSize) { - setAttribute(PREDICTED_MEDIAN_INSERT_SIZE_TAG, (predictedMedianInsertSize == null? null: predictedMedianInsertSize.toString())); + setAttribute( + PREDICTED_MEDIAN_INSERT_SIZE_TAG, + (predictedMedianInsertSize == null ? null : predictedMedianInsertSize.toString())); } - public String getProgramGroup() { return getAttribute(PROGRAM_GROUP_TAG); } - public void setProgramGroup(final String programGroup) { setAttribute(PROGRAM_GROUP_TAG, programGroup); } + public String getProgramGroup() { + return getAttribute(PROGRAM_GROUP_TAG); + } + + public void setProgramGroup(final String programGroup) { + setAttribute(PROGRAM_GROUP_TAG, programGroup); + } + + public String getPlatformModel() { + return getAttribute(PLATFORM_MODEL_TAG); + } + + public void setPlatformModel(final String platformModel) { + setAttribute(PLATFORM_MODEL_TAG, platformModel); + } - public String getPlatformModel() { return getAttribute(PLATFORM_MODEL_TAG); } - public void setPlatformModel(final String platformModel) { setAttribute(PLATFORM_MODEL_TAG, platformModel); } - /** * @return true if this == that except for the read group ID, which is arbitrary */ @@ -250,7 +318,6 @@ Set getStandardTags() { @Override public String getSAMString() { - return new SAMTextHeaderCodec().getRGLine(this); + return new SAMTextHeaderCodec().getRGLine(this); } } - diff --git a/src/main/java/htsjdk/samtools/SAMRecord.java b/src/main/java/htsjdk/samtools/SAMRecord.java index dd15248fc7..41166af530 100644 --- a/src/main/java/htsjdk/samtools/SAMRecord.java +++ b/src/main/java/htsjdk/samtools/SAMRecord.java @@ -30,9 +30,7 @@ import htsjdk.samtools.util.Log; import htsjdk.samtools.util.SequenceUtil; import htsjdk.samtools.util.StringUtil; - import java.io.Serializable; -import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -42,7 +40,6 @@ import java.util.Map; import java.util.Set; - /** * Java binding for a SAM file record. c.f. http://samtools.sourceforge.net/SAM1.pdf *

    @@ -57,7 +54,7 @@ * in the sort order per the above paragraph. Only if the mateUnmappedFlag is false can the mate reference name/index * and mate alignment start be interpreted as indicating the actual alignment position of the mate. *

    - * Note also that there are a number of getters & setters that are linked, i.e. they present different representations + * Note also that there are a number of getters and setters that are linked, i.e. they present different representations * of the same underlying data. In these cases there is typically a representation that is preferred because it * ought to be faster than some other representation. The following are the preferred representations: *

    */ - public static CRAMReferenceSource getDefaultCRAMReferenceSource() { + public static CRAMReferenceSource getDefaultCRAMReferenceSource() { if (null != Defaults.REFERENCE_FASTA) { if (Defaults.REFERENCE_FASTA.exists()) { - log.info(String.format("Default reference file %s exists, so going to use that.", Defaults.REFERENCE_FASTA.getAbsolutePath())); + log.info(String.format( + "Default reference file %s exists, so going to use that.", + Defaults.REFERENCE_FASTA.getAbsolutePath())); return new ReferenceSource(Defaults.REFERENCE_FASTA); + } else { + throw new IllegalArgumentException("The file specified by the reference_fasta property does not exist: " + + Defaults.REFERENCE_FASTA.getName()); } - else { - throw new IllegalArgumentException( - "The file specified by the reference_fasta property does not exist: " + Defaults.REFERENCE_FASTA.getName()); - } - } - else if (Defaults.USE_CRAM_REF_DOWNLOAD) { + } else if (Defaults.USE_CRAM_REF_DOWNLOAD) { log.info("USE_CRAM_REF_DOWNLOAD=true, so attempting to download reference file as needed."); - return new ReferenceSource((ReferenceSequenceFile)null); - } - else { + return new ReferenceSource((ReferenceSequenceFile) null); + } else { return new CRAMLazyReferenceSource(); } } @@ -119,8 +117,7 @@ private byte[] findInCache(final String name) { final WeakReference weakReference = cacheW.get(name); if (weakReference != null) { final byte[] bytes = weakReference.get(); - if (bytes != null) - return bytes; + if (bytes != null) return bytes; } return null; } @@ -138,8 +135,7 @@ private byte[] addToCache(final String sequenceName, final byte[] bases) { } @Override - public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, - final boolean tryNameVariants) { + public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, final boolean tryNameVariants) { { // check cache by sequence name: final String name = record.getSequenceName(); final byte[] bases = findInCache(name); @@ -152,14 +148,11 @@ public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, { // check cache by md5: if (md5 != null) { byte[] bases = findInCache(md5); - if (bases != null) - return bases; + if (bases != null) return bases; bases = findInCache(md5.toLowerCase()); - if (bases != null) - return bases; + if (bases != null) return bases; bases = findInCache(md5.toUpperCase()); - if (bases != null) - return bases; + if (bases != null) return bases; } } @@ -189,9 +182,7 @@ public synchronized byte[] getReferenceBases(final SAMSequenceRecord record, @Override public byte[] getReferenceBasesByRegion( - final SAMSequenceRecord sequenceRecord, - final int zeroBasedStart, - final int requestedRegionLength) { + final SAMSequenceRecord sequenceRecord, final int zeroBasedStart, final int requestedRegionLength) { ValidationUtils.validateArg(zeroBasedStart >= 0, "start must be >= 0"); // this implementation maintains the entire reference sequence, and hands out whatever region @@ -204,11 +195,12 @@ public byte[] getReferenceBasesByRegion( backingContigIndex = sequenceRecord.getSequenceIndex(); if (zeroBasedStart >= bases.length) { - throw new IllegalArgumentException(String.format("Requested start %d is beyond the sequence length %s", - zeroBasedStart, - sequenceRecord.getSequenceName())); + throw new IllegalArgumentException(String.format( + "Requested start %d is beyond the sequence length %s", + zeroBasedStart, sequenceRecord.getSequenceName())); } - return Arrays.copyOfRange(bases, zeroBasedStart, Math.min(bases.length, zeroBasedStart + requestedRegionLength)); + return Arrays.copyOfRange( + bases, zeroBasedStart, Math.min(bases.length, zeroBasedStart + requestedRegionLength)); } return bases; } @@ -222,8 +214,7 @@ private byte[] getBackingBases(final SAMSequenceRecord sequenceRecord) { } private byte[] findBasesByName(final String name, final boolean tryVariants) { - if (rsFile == null || !rsFile.isIndexed()) - return null; + if (rsFile == null || !rsFile.isIndexed()) return null; ReferenceSequence sequence = null; try { @@ -231,8 +222,7 @@ private byte[] findBasesByName(final String name, final boolean tryVariants) { } catch (final SAMException e) { // the only way to test if rsFile contains the sequence is to try and catch exception. } - if (sequence != null) - return sequence.getBases(); + if (sequence != null) return sequence.getBases(); if (tryVariants) { for (final String variant : getVariants(name)) { @@ -241,8 +231,7 @@ private byte[] findBasesByName(final String name, final boolean tryVariants) { } catch (final SAMException e) { log.warn("Sequence not found: " + variant); } - if (sequence != null) - return sequence.getBases(); + if (sequence != null) return sequence.getBases(); } } return null; @@ -253,8 +242,7 @@ private byte[] findBasesByMD5(final String md5) { for (int i = 0; i < downloadTriesBeforeFailing; i++) { try (final InputStream is = new URL(url).openStream()) { - if (is == null) - return null; + if (is == null) return null; log.info("Downloading reference sequence: " + url); final byte[] data = InputStreamUtils.readFully(is); @@ -264,37 +252,29 @@ private byte[] findBasesByMD5(final String md5) { if (md5.equals(downloadedMD5)) { return data; } else { - final String message = String - .format("Downloaded sequence is corrupt: requested md5=%s, received md5=%s", - md5, downloadedMD5); + final String message = String.format( + "Downloaded sequence is corrupt: requested md5=%s, received md5=%s", md5, downloadedMD5); log.error(message); } - } - catch (final IOException e) { + } catch (final IOException e) { throw new RuntimeException(e); } } - throw new GaveUpException("Giving up on downloading sequence for md5 " - + md5); + throw new GaveUpException("Giving up on downloading sequence for md5 " + md5); } - private static final Pattern chrPattern = Pattern.compile("chr.*", - Pattern.CASE_INSENSITIVE); + private static final Pattern chrPattern = Pattern.compile("chr.*", Pattern.CASE_INSENSITIVE); private List getVariants(final String name) { final List variants = new ArrayList<>(); - if (name.equals("M")) - variants.add("MT"); + if (name.equals("M")) variants.add("MT"); - if (name.equals("MT")) - variants.add("M"); + if (name.equals("MT")) variants.add("M"); final boolean chrPatternMatch = chrPattern.matcher(name).matches(); - if (chrPatternMatch) - variants.add(name.substring(3)); - else - variants.add("chr" + name); + if (chrPatternMatch) variants.add(name.substring(3)); + else variants.add("chr" + name); if ("chrM".equals(name)) { // chrM case: diff --git a/src/main/java/htsjdk/samtools/cram/structure/AlignmentContext.java b/src/main/java/htsjdk/samtools/cram/structure/AlignmentContext.java index f0b42ef75a..da671b5b0c 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/AlignmentContext.java +++ b/src/main/java/htsjdk/samtools/cram/structure/AlignmentContext.java @@ -29,7 +29,6 @@ import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.util.Log; - import java.util.Objects; /** @@ -47,22 +46,15 @@ public class AlignmentContext { public static final int NO_ALIGNMENT_END = SAMRecord.NO_ALIGNMENT_START; // SAMRecord uses this for alignmentEnd... public static final AlignmentContext MULTIPLE_REFERENCE_CONTEXT = - new AlignmentContext( - ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, - NO_ALIGNMENT_START, - NO_ALIGNMENT_SPAN); + new AlignmentContext(ReferenceContext.MULTIPLE_REFERENCE_CONTEXT, NO_ALIGNMENT_START, NO_ALIGNMENT_SPAN); public static final AlignmentContext UNMAPPED_UNPLACED_CONTEXT = - new AlignmentContext( - ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, - NO_ALIGNMENT_START, - NO_ALIGNMENT_SPAN); + new AlignmentContext(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, NO_ALIGNMENT_START, NO_ALIGNMENT_SPAN); - public static final AlignmentContext EOF_CONTAINER_CONTEXT = - new AlignmentContext( - ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, - CramIO.EOF_ALIGNMENT_START, // defined by the spec... - CramIO.EOF_ALIGNMENT_SPAN); // defined by the spec... + public static final AlignmentContext EOF_CONTAINER_CONTEXT = new AlignmentContext( + ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, + CramIO.EOF_ALIGNMENT_START, // defined by the spec... + CramIO.EOF_ALIGNMENT_SPAN); // defined by the spec... private final ReferenceContext referenceContext; // minimum alignment start of the reads represented here, using a 1-based coordinate system @@ -83,9 +75,8 @@ public class AlignmentContext { * @param alignmentStart the 1-based alignment start * @param alignmentSpan the alignment span */ - public AlignmentContext(final ReferenceContext referenceContext, - final int alignmentStart, - final int alignmentSpan) { + public AlignmentContext( + final ReferenceContext referenceContext, final int alignmentStart, final int alignmentSpan) { this.referenceContext = referenceContext; this.alignmentStart = alignmentStart; this.alignmentSpan = alignmentSpan; @@ -129,9 +120,7 @@ public static void validateAlignmentContext( if (alignmentStart < 0) { final String errorString = String.format( "Single-reference alignment context with an invalid start detected (index %d/start %d/span %d)", - referenceContext.getReferenceSequenceID(), - alignmentStart, - alignmentSpan); + referenceContext.getReferenceSequenceID(), alignmentStart, alignmentSpan); if (isStrict) { throw new CRAMException(errorString); } else { @@ -143,12 +132,12 @@ public static void validateAlignmentContext( case UNMAPPED_UNPLACED_TYPE: // the spec requires start==0 and span==0 for unmapped, but also make a special exception // for EOF Containers - if (!(alignmentStart == NO_ALIGNMENT_START && alignmentSpan == NO_ALIGNMENT_SPAN) && - !(alignmentStart == CramIO.EOF_ALIGNMENT_START && alignmentSpan == CramIO.EOF_ALIGNMENT_SPAN)) { + if (!(alignmentStart == NO_ALIGNMENT_START && alignmentSpan == NO_ALIGNMENT_SPAN) + && !(alignmentStart == CramIO.EOF_ALIGNMENT_START + && alignmentSpan == CramIO.EOF_ALIGNMENT_SPAN)) { final String errorString = String.format( "Unmapped/unplaced alignment context with invalid start/span detected (%d/%d)", - alignmentStart, - alignmentSpan); + alignmentStart, alignmentSpan); if (isStrict) { throw new CRAMException(errorString); } else { @@ -161,8 +150,7 @@ public static void validateAlignmentContext( if (alignmentStart != NO_ALIGNMENT_START || alignmentSpan != NO_ALIGNMENT_SPAN) { final String errorString = String.format( "Multi-reference alignment context with invalid start/span detected (%d/%d)", - alignmentStart, - alignmentSpan); + alignmentStart, alignmentSpan); if (isStrict) { throw new CRAMException(errorString); } else { @@ -172,20 +160,14 @@ public static void validateAlignmentContext( break; default: - throw new IllegalArgumentException( - String.format( - "Alignment context with unknown reference context type: %s", - referenceContext.getType())); + throw new IllegalArgumentException(String.format( + "Alignment context with unknown reference context type: %s", referenceContext.getType())); } } @Override public String toString() { - return String.format( - "sequenceId=%s, start=%d, span=%d", - referenceContext, - alignmentStart, - alignmentSpan); + return String.format("sequenceId=%s, start=%d, span=%d", referenceContext, alignmentStart, alignmentSpan); } @Override @@ -193,9 +175,9 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; AlignmentContext that = (AlignmentContext) o; - return alignmentStart == that.alignmentStart && - alignmentSpan == that.alignmentSpan && - Objects.equals(referenceContext, that.referenceContext); + return alignmentStart == that.alignmentStart + && alignmentSpan == that.alignmentSpan + && Objects.equals(referenceContext, that.referenceContext); } @Override diff --git a/src/main/java/htsjdk/samtools/cram/structure/AlignmentSpan.java b/src/main/java/htsjdk/samtools/cram/structure/AlignmentSpan.java index d2b58c3e1f..7c8cb43934 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/AlignmentSpan.java +++ b/src/main/java/htsjdk/samtools/cram/structure/AlignmentSpan.java @@ -74,12 +74,9 @@ public static AlignmentSpan combine(final AlignmentSpan a, final AlignmentSpan b int span; if (a.getAlignmentStart() == b.getAlignmentStart()) { span = Math.max(a.getAlignmentSpan(), b.getAlignmentSpan()); - } - else { - span = Math.max( - a.getAlignmentStart() + a.getAlignmentSpan(), - b.getAlignmentStart() + b.getAlignmentSpan() - ) - start; + } else { + span = Math.max(a.getAlignmentStart() + a.getAlignmentSpan(), b.getAlignmentStart() + b.getAlignmentSpan()) + - start; } final int mappedCount = a.mappedCount + b.mappedCount; @@ -108,7 +105,9 @@ public int getUnmappedCount() { } // unmapped unplaced only, overlaps with getUnmappedCount (which includes unmapped placed and unplaced) - public int getUnmappedUnplacedCount() { return unmappedUnplacedCount; } + public int getUnmappedUnplacedCount() { + return unmappedUnplacedCount; + } @Override public boolean equals(Object o) { diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMCodecModelContext.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMCodecModelContext.java index 9e2ac870ba..a9e74f78a7 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CRAMCodecModelContext.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMCodecModelContext.java @@ -1,7 +1,50 @@ package htsjdk.samtools.cram.structure; +import java.util.List; + /** - * Context model data/accumulators for use by CRAM 3.1 codec write implementations. + * Context model data for use by CRAM 3.1 codec write implementations that need per-record metadata + * beyond the raw byte stream. Populated during slice construction from the list of CRAM records, + * then passed through to {@link htsjdk.samtools.cram.compression.ExternalCompressor#compress} calls. + * + * Currently used by FQZComp, which needs per-record quality score lengths and BAM flags to + * properly compress quality scores with context modeling. */ public class CRAMCodecModelContext { + + private int[] qualityScoreLengths; + private int[] bamFlags; + + /** + * Populate this context from the records in a slice. Should be called during slice construction + * before records are written to blocks. + * + * @param records the CRAM records for this slice + */ + public void populateFromRecords(final List records) { + qualityScoreLengths = new int[records.size()]; + bamFlags = new int[records.size()]; + for (int i = 0; i < records.size(); i++) { + final CRAMCompressionRecord record = records.get(i); + qualityScoreLengths[i] = CRAMCompressionRecord.isForcePreserveQualityScores(record.getCRAMFlags()) + ? record.getReadLength() + : 0; + bamFlags[i] = record.getBAMFlags(); + } + } + + /** @return per-record quality score lengths (one per record in the slice), or null if not populated */ + public int[] getQualityScoreLengths() { + return qualityScoreLengths; + } + + /** @return per-record BAM flags (one per record in the slice), or null if not populated */ + public int[] getBamFlags() { + return bamFlags; + } + + /** @return number of records, or 0 if not populated */ + public int getNumRecords() { + return qualityScoreLengths != null ? qualityScoreLengths.length : 0; + } } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionProfile.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionProfile.java new file mode 100644 index 0000000000..f3e1bbf5a0 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionProfile.java @@ -0,0 +1,342 @@ +package htsjdk.samtools.cram.structure; + +import htsjdk.samtools.cram.common.CRAMVersion; +import htsjdk.samtools.cram.common.CramVersions; +import htsjdk.samtools.cram.compression.range.RangeParams; +import htsjdk.samtools.cram.compression.rans.RANSNx16Params; +import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; +import java.util.EnumMap; + +/** + * Predefined CRAM compression profiles matching those in htslib/samtools. Each profile defines + * the CRAM version, compression level, reads-per-slice, and a per-{@link DataSeries} compressor + * assignment via {@link CompressorDescriptor}. + * + *

    Usage: + *

    + *   // Get a strategy for a specific profile:
    + *   CRAMEncodingStrategy strategy = CRAMCompressionProfile.ARCHIVE.toStrategy();
    + *
    + *   // Or apply a profile to an existing strategy:
    + *   CRAMCompressionProfile.FAST.applyTo(existingStrategy);
    + * 
    + * + * @see CRAMEncodingStrategy + * @see CompressorDescriptor + */ +public enum CRAMCompressionProfile { + + /** + * Speed-optimized profile. Uses only GZIP at level 1. Writes CRAM 3.0 since no 3.1-specific + * codecs are used, avoiding the need for a 3.1-capable reader. + */ + FAST(CramVersions.CRAM_v3, 1, 10_000), + + /** + * Balanced profile (default). Uses rANS Nx16 for entropy-rich data series, FQZComp for quality + * scores, and Name Tokeniser for read names. Writes CRAM 3.1. + */ + NORMAL(CramVersions.CRAM_v3_1, 5, 10_000), + + /** + * Size-optimized profile. Uses GZIP at higher compression level with FQZComp for quality scores. + * Does not use Name Tokeniser or rANS (matching htslib SMALL behavior). Writes CRAM 3.1. + */ + SMALL(CramVersions.CRAM_v3_1, 6, 25_000), + + /** + * Maximum compression profile. Uses rANS Nx16 for entropy-rich data, FQZComp for quality scores, + * and Name Tokeniser for read names at higher compression settings. Writes CRAM 3.1. + * + *

    This profile uses trial compression: multiple codecs are tried per block and the smallest + * result wins. Additional candidates include BZIP2, the Range (arithmetic) coder, and GZIP. + */ + ARCHIVE(CramVersions.CRAM_v3_1, 7, 100_000); + + private final CRAMVersion cramVersion; + private final int gzipLevel; + private final int readsPerSlice; + + /** + * Look up a profile by name, ignoring case. For example, {@code "archive"}, {@code "ARCHIVE"}, + * and {@code "Archive"} all return {@link #ARCHIVE}. + * + * @param name the profile name (case-insensitive) + * @return the matching profile + * @throws IllegalArgumentException if no profile matches + */ + public static CRAMCompressionProfile valueOfCaseInsensitive(final String name) { + for (final CRAMCompressionProfile profile : values()) { + if (profile.name().equalsIgnoreCase(name)) { + return profile; + } + } + throw new IllegalArgumentException( + "Unknown CRAM compression profile: " + name + ". Must be one of: fast, normal, small, archive"); + } + + CRAMCompressionProfile(final CRAMVersion cramVersion, final int gzipLevel, final int readsPerSlice) { + this.cramVersion = cramVersion; + this.gzipLevel = gzipLevel; + this.readsPerSlice = readsPerSlice; + } + + /** + * Create a new {@link CRAMEncodingStrategy} configured with this profile's settings. + * + * @return a new strategy with this profile applied + */ + public CRAMEncodingStrategy toStrategy() { + // Use the no-profile constructor to avoid infinite recursion (default constructor calls NORMAL.applyTo) + final CRAMEncodingStrategy strategy = new CRAMEncodingStrategy(false); + applyTo(strategy); + return strategy; + } + + /** + * Apply this profile's settings to an existing strategy, overwriting the CRAM version, + * GZIP compression level, reads-per-slice, and compressor map. + * + * @param strategy the strategy to modify + */ + public void applyTo(final CRAMEncodingStrategy strategy) { + strategy.setCramVersion(cramVersion); + strategy.setGZIPCompressionLevel(gzipLevel); + strategy.setReadsPerSlice(readsPerSlice); + strategy.setCompressorMap(buildCompressorMap()); + strategy.setTrialCandidatesMap(buildTrialCandidatesMap()); + } + + /** + * Build the per-DataSeries compressor map for this profile. Only includes data series + * that are actually written by the htsjdk CRAM implementation (excludes obsolete TC, TN + * and unused BB, QQ series). + */ + private EnumMap buildCompressorMap() { + final EnumMap map = new EnumMap<>(DataSeries.class); + + switch (this) { + case FAST: + buildFastMap(map); + break; + case NORMAL: + buildNormalMap(map); + break; + case SMALL: + buildSmallMap(map); + break; + case ARCHIVE: + buildArchiveMap(map); + break; + } + + return map; + } + + /** FAST: all GZIP at level 1, no 3.1 codecs. */ + private void buildFastMap(final EnumMap map) { + final CompressorDescriptor gzip = new CompressorDescriptor(BlockCompressionMethod.GZIP, gzipLevel); + for (final DataSeries ds : getWrittenDataSeries()) { + map.put(ds, gzip); + } + } + + /** NORMAL: rANS Nx16 for low-entropy data, GZIP for positional/byte-array data, FQZComp for QS, NameTok for RN. */ + private void buildNormalMap(final EnumMap map) { + final CompressorDescriptor gzip = new CompressorDescriptor(BlockCompressionMethod.GZIP, gzipLevel); + final CompressorDescriptor ransOrder0 = + new CompressorDescriptor(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ZERO.ordinal()); + final CompressorDescriptor ransOrder1 = + new CompressorDescriptor(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()); + + // Default everything to GZIP — then override specific series with better codecs + for (final DataSeries ds : getWrittenDataSeries()) { + map.put(ds, gzip); + } + + // rANS Nx16 Order 0 for position-like integer data with low entropy + map.put(DataSeries.AP_AlignmentPositionOffset, ransOrder0); + map.put(DataSeries.RI_RefId, ransOrder0); + + // rANS Nx16 Order 1 for low-entropy integer data series where rANS outperforms GZIP + map.put(DataSeries.BA_Base, ransOrder1); + map.put(DataSeries.BF_BitFlags, ransOrder1); + map.put(DataSeries.BS_BaseSubstitutionCode, ransOrder1); + map.put(DataSeries.CF_CompressionBitFlags, ransOrder1); + map.put(DataSeries.FC_FeatureCode, ransOrder1); + map.put(DataSeries.FN_NumberOfReadFeatures, ransOrder1); + map.put(DataSeries.MF_MateBitFlags, ransOrder1); + map.put(DataSeries.MQ_MappingQualityScore, ransOrder1); + map.put(DataSeries.NS_NextFragmentReferenceSequenceID, ransOrder1); + map.put(DataSeries.RG_ReadGroup, ransOrder1); + map.put(DataSeries.RL_ReadLength, ransOrder1); + map.put(DataSeries.TL_TagIdList, ransOrder1); + map.put(DataSeries.TS_InsertSize, ransOrder1); + + // Keep GZIP for high-entropy positional data where LZ77 helps + // NP (mate position), FP (feature position) — these have high variance + // IN (insertions), SC (soft clips) — byte arrays benefit from LZ77 + + // Specialized codecs + map.put(DataSeries.QS_QualityScore, new CompressorDescriptor(BlockCompressionMethod.FQZCOMP)); + map.put(DataSeries.RN_ReadName, new CompressorDescriptor(BlockCompressionMethod.NAME_TOKENISER)); + } + + /** SMALL: Same codec assignments as NORMAL but at higher compression level. Trial compression + * adds BZIP2 alongside rANS/GZIP to let the trial pick the best per data series. */ + private void buildSmallMap(final EnumMap map) { + buildNormalMap(map); + } + + /** ARCHIVE: Same primary codecs as NORMAL but at higher compression, plus larger slices. + * Trial compression candidates (BZIP2, Range coder) are provided via buildTrialCandidatesMap. */ + private void buildArchiveMap(final EnumMap map) { + buildNormalMap(map); + } + + /** + * Build the trial compression candidates map for this profile. Only ARCHIVE and SMALL profiles + * currently use trial compression. For data series with trial candidates, the primary compressor + * (from buildCompressorMap) plus these additional candidates are all tried, and the smallest wins. + * + * @return the trial candidates map, or null if this profile doesn't use trial compression + */ + private EnumMap> buildTrialCandidatesMap() { + if (this != ARCHIVE && this != SMALL) { + return null; + } + + final EnumMap> trialMap = new EnumMap<>(DataSeries.class); + + // BZIP2 as an alternative for general data series + final CompressorDescriptor bzip2 = new CompressorDescriptor(BlockCompressionMethod.BZIP2); + + // Range (ARITH) coder variants as alternatives to rANS Nx16 + final CompressorDescriptor arithOrder0 = + new CompressorDescriptor(BlockCompressionMethod.ADAPTIVE_ARITHMETIC, 0); + final CompressorDescriptor arithOrder1 = + new CompressorDescriptor(BlockCompressionMethod.ADAPTIVE_ARITHMETIC, RangeParams.ORDER_FLAG_MASK); + + // GZIP as a fallback candidate (may win for small blocks) + final CompressorDescriptor gzip = new CompressorDescriptor(BlockCompressionMethod.GZIP, gzipLevel); + + if (this == ARCHIVE) { + // For entropy-rich data series that use rANS Nx16: also try Range coder and BZIP2 + for (final DataSeries ds : new DataSeries[] { + DataSeries.BA_Base, + DataSeries.BF_BitFlags, + DataSeries.CF_CompressionBitFlags, + DataSeries.NS_NextFragmentReferenceSequenceID, + DataSeries.RG_ReadGroup, + DataSeries.RL_ReadLength, + DataSeries.TS_InsertSize + }) { + trialMap.put(ds, java.util.List.of(arithOrder1, bzip2, gzip)); + } + // Position-like data: also try Range order 0 + for (final DataSeries ds : new DataSeries[] {DataSeries.AP_AlignmentPositionOffset, DataSeries.RI_RefId}) { + trialMap.put(ds, java.util.List.of(arithOrder0, bzip2, gzip)); + } + // GZIP-compressed data series: also try BZIP2 and rANS + final CompressorDescriptor ransOrder1 = + new CompressorDescriptor(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()); + for (final DataSeries ds : new DataSeries[] { + DataSeries.BS_BaseSubstitutionCode, + DataSeries.DL_DeletionLength, + DataSeries.FC_FeatureCode, + DataSeries.FN_NumberOfReadFeatures, + DataSeries.FP_FeaturePosition, + DataSeries.HC_HardClip, + DataSeries.MF_MateBitFlags, + DataSeries.MQ_MappingQualityScore, + DataSeries.NF_RecordsToNextFragment, + DataSeries.NP_NextFragmentAlignmentStart, + DataSeries.PD_padding, + DataSeries.RS_RefSkip, + DataSeries.TL_TagIdList + }) { + trialMap.put(ds, java.util.List.of(bzip2, ransOrder1)); + } + } else if (this == SMALL) { + // SMALL: same as NORMAL primary codecs but with BZIP2 added to trial candidates. + // htslib SMALL (level 6, use_rans=1, use_bz2=1) trials GZIP + BZIP2 + all rANS variants. + // For rANS-primary series: also try BZIP2 and GZIP + for (final DataSeries ds : new DataSeries[] { + DataSeries.BA_Base, + DataSeries.BF_BitFlags, + DataSeries.CF_CompressionBitFlags, + DataSeries.NS_NextFragmentReferenceSequenceID, + DataSeries.RG_ReadGroup, + DataSeries.RL_ReadLength, + DataSeries.TS_InsertSize + }) { + trialMap.put(ds, java.util.List.of(bzip2, gzip)); + } + // For rANS Order 0 series: also try BZIP2 and GZIP + for (final DataSeries ds : new DataSeries[] {DataSeries.AP_AlignmentPositionOffset, DataSeries.RI_RefId}) { + trialMap.put(ds, java.util.List.of(bzip2, gzip)); + } + // For GZIP-primary series: also try BZIP2 and rANS + final CompressorDescriptor ransOrder1 = + new CompressorDescriptor(BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()); + for (final DataSeries ds : new DataSeries[] { + DataSeries.BS_BaseSubstitutionCode, + DataSeries.DL_DeletionLength, + DataSeries.FC_FeatureCode, + DataSeries.FN_NumberOfReadFeatures, + DataSeries.FP_FeaturePosition, + DataSeries.HC_HardClip, + DataSeries.MF_MateBitFlags, + DataSeries.MQ_MappingQualityScore, + DataSeries.NF_RecordsToNextFragment, + DataSeries.NP_NextFragmentAlignmentStart, + DataSeries.PD_padding, + DataSeries.RS_RefSkip, + DataSeries.TL_TagIdList, + DataSeries.IN_Insertion, + DataSeries.SC_SoftClip + }) { + trialMap.put(ds, java.util.List.of(bzip2, ransOrder1)); + } + } + + return trialMap; + } + + /** + * Returns the set of DataSeries values that are actually written by the htsjdk CRAM implementation. + * Excludes obsolete (TC, TN) and unused (QQ) series. + */ + private static final DataSeries[] WRITTEN_DATA_SERIES = { + DataSeries.AP_AlignmentPositionOffset, + DataSeries.BA_Base, + DataSeries.BF_BitFlags, + DataSeries.BS_BaseSubstitutionCode, + DataSeries.CF_CompressionBitFlags, + DataSeries.DL_DeletionLength, + DataSeries.FC_FeatureCode, + DataSeries.FN_NumberOfReadFeatures, + DataSeries.FP_FeaturePosition, + DataSeries.HC_HardClip, + DataSeries.IN_Insertion, + DataSeries.MF_MateBitFlags, + DataSeries.MQ_MappingQualityScore, + DataSeries.NF_RecordsToNextFragment, + DataSeries.NP_NextFragmentAlignmentStart, + DataSeries.NS_NextFragmentReferenceSequenceID, + DataSeries.PD_padding, + DataSeries.QS_QualityScore, + DataSeries.RG_ReadGroup, + DataSeries.RI_RefId, + DataSeries.RL_ReadLength, + DataSeries.RN_ReadName, + DataSeries.RS_RefSkip, + DataSeries.SC_SoftClip, + DataSeries.TL_TagIdList, + DataSeries.TS_InsertSize, + }; + + private static DataSeries[] getWrittenDataSeries() { + return WRITTEN_DATA_SERIES; + } +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java index 95c32aadc5..ee610b8cf1 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMCompressionRecord.java @@ -25,6 +25,7 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.*; +import htsjdk.samtools.SAMTag; import htsjdk.samtools.cram.build.CRAMReferenceRegion; import htsjdk.samtools.cram.common.CRAMVersion; import htsjdk.samtools.cram.common.CramVersions; @@ -36,7 +37,6 @@ import htsjdk.samtools.util.Log; import htsjdk.samtools.util.SequenceUtil; import htsjdk.utils.ValidationUtils; - import java.util.*; /** @@ -48,31 +48,32 @@ public class CRAMCompressionRecord { private static final Log log = Log.getInstance(CRAMCompressionRecord.class); // CF data series flags (defined by the CRAM spec) - public static final int CF_QS_PRESERVED_AS_ARRAY = 0x1; // preserve quality scores as array - public static final int CF_DETACHED = 0x2; // mate is stored literally vs as record offset + public static final int CF_QS_PRESERVED_AS_ARRAY = 0x1; // preserve quality scores as array + public static final int CF_DETACHED = 0x2; // mate is stored literally vs as record offset public static final int CF_HAS_MATE_DOWNSTREAM = 0x4; // sequence is unknown; encoded reference differences are present only to recreate the CIGAR string - public static final int CF_UNKNOWN_BASES = 0x8; + public static final int CF_UNKNOWN_BASES = 0x8; - public final static int NO_READGROUP_ID = -1; - public final static byte MISSING_QUALITY_SCORE = -1; // SAMRecord.UNKNOWN_MAPPING_QUALITY: 255 - private final static byte DEFAULT_QUALITY_SCORE = '?' - '!'; + public static final int NO_READGROUP_ID = -1; + public static final byte MISSING_QUALITY_SCORE = -1; // SAMRecord.UNKNOWN_MAPPING_QUALITY: 255 + private static final byte DEFAULT_QUALITY_SCORE = '?' - '!'; // NOTE: "mate unmapped" and "mate negative strand" (MF_MATE_UNMAPPED and MF_MATE_NEG_STRAND, aka // MATE_REVERSE_STRAND and MATE_UNMAPPED in SAMRecord flag space) have different values in CRAM // mateFlags space and SAMFlags space) are modeled redundantly in the bamFlags and mateFlags fields. // MF data series flags (defined by the CRAM spec) - public static final int MF_MATE_NEG_STRAND = 0x1; // same meaning as SAMFlag.MATE_REVERSE_STRAND, but different value - public static final int MF_MATE_UNMAPPED = 0x2; // same meaning as SAMFlag.MATE_UNMAPPED, but different value + public static final int MF_MATE_NEG_STRAND = + 0x1; // same meaning as SAMFlag.MATE_REVERSE_STRAND, but different value + public static final int MF_MATE_UNMAPPED = 0x2; // same meaning as SAMFlag.MATE_UNMAPPED, but different value private final int referenceIndex; private final int alignmentStart; // position on the reference where this read starts (1-based (SAM) coordinates) - private final int alignmentEnd; // position on the reference where this alignment ends + private final int alignmentEnd; // position on the reference where this alignment ends private final int readLength; private final CRAMRecordReadFeatures readFeatures; private final int mappingQuality; private final int readGroupID; - private final List tags; + private List tags; private final long sequentialIndex; // 1 based sequential index of this record in the cram stream private int bamFlags; @@ -83,9 +84,10 @@ public class CRAMCompressionRecord { // the contents hasher doesn't handle nulls private byte[] readBases; private byte[] qualityScores; + private Cigar cachedCigar; // populated by restoreBasesAndTags, used by toSAMRecord private MutableInt tagIdsIndex = new MutableInt(0); - //mate info + // mate info private int mateFlags; private int mateAlignmentStart; private int mateReferenceIndex; @@ -153,9 +155,9 @@ public CRAMCompressionRecord( // IUPAC codes without a dot, so we follow the same approach to reproduce the behaviour of samtools. // copy read bases before we modify the bases to BAM bases to avoid changing the original record: final byte[] originalBases = samRecord.getReadBases(); - readBases = originalBases == null || originalBases.equals(SAMRecord.NULL_SEQUENCE) ? - SAMRecord.NULL_SEQUENCE : - SequenceUtil.toBamReadBasesInPlace(Arrays.copyOf(originalBases, samRecord.getReadLength())); + readBases = originalBases == null || originalBases.equals(SAMRecord.NULL_SEQUENCE) + ? SAMRecord.NULL_SEQUENCE + : SequenceUtil.toBamReadBasesInPlace(Arrays.copyOf(originalBases, samRecord.getReadLength())); if (samRecord.getReadUnmappedFlag()) { readFeatures = new CRAMRecordReadFeatures(); alignmentEnd = AlignmentContext.NO_ALIGNMENT_END; @@ -196,17 +198,50 @@ public CRAMCompressionRecord( } final SAMReadGroupRecord readGroup = samRecord.getReadGroup(); - readGroupID = readGroup == null ? - NO_READGROUP_ID : - readGroupMap.get(readGroup.getId()); + readGroupID = readGroup == null ? NO_READGROUP_ID : readGroupMap.get(readGroup.getId()); + + // Tag handling: NM:i and MD:Z are stripped for mapped reads (matching htslib default) + // and regenerated from read features + reference during decode. If the stored NM/MD + // values don't match what would be recomputed (non-standard values), they are kept verbatim. + // RG is also skipped since read groups have a dedicated data series. + boolean stripNM = !samRecord.getReadUnmappedFlag() && !encodingStrategy.getStoreNM(); + boolean stripMD = !samRecord.getReadUnmappedFlag() && !encodingStrategy.getStoreMD(); + + // Validate that stored NM/MD match recomputed values; keep non-standard values verbatim + if ((stripNM || stripMD) + && referenceBases != null + && samRecord.getCigar() != null + && !samRecord.getCigar().isEmpty() + && samRecord.getReadBases() != null + && samRecord.getReadBases().length > 0) { + final htsjdk.samtools.util.Tuple computed = + htsjdk.samtools.util.SequenceUtil.calculateMdAndNm( + samRecord.getCigar().getCigarElements(), + samRecord.getReadBases(), + referenceBases, + 0, + samRecord.getAlignmentStart()); + if (stripNM && samRecord.getAttribute(SAMTag.NM) != null) { + final int storedNM = ((Number) samRecord.getAttribute(SAMTag.NM)).intValue(); + if (storedNM != computed.b) { + stripNM = false; + } + } + if (stripMD && samRecord.getAttribute(SAMTag.MD) != null) { + final Object mdValue = samRecord.getAttribute(SAMTag.MD); + if (!(mdValue instanceof String) || !computed.a.equals(mdValue)) { + stripMD = false; + } + } + } - if (samRecord.getAttributes().size() > 0) { - tags = new ArrayList(); + if (!samRecord.getAttributes().isEmpty()) { + tags = new ArrayList<>(samRecord.getAttributes().size()); for (final SAMRecord.SAMTagAndValue tagAndValue : samRecord.getAttributes()) { - // Skip read group, since read group have a dedicated data series - if (!SAMTag.RG.name().equals(tagAndValue.tag)) { - tags.add(ReadTag.deriveTypeFromValue(tagAndValue.tag, tagAndValue.value)); - } + if (SAMTag.RG.name().equals(tagAndValue.tag)) continue; + if (stripNM && SAMTag.NM.name().equals(tagAndValue.tag)) continue; + if (stripMD && SAMTag.MD.name().equals(tagAndValue.tag)) continue; + tags.add(ReadTag.deriveTypeFromValue(tagAndValue.tag, tagAndValue.value)); } } else { tags = null; @@ -248,16 +283,17 @@ public CRAMCompressionRecord( final byte[] qualityScores, final byte[] readBases, final List readTags, - final ListreadFeaturesList, + final List readFeaturesList, final int readGroupID, final int mateFlags, final int mateReferenceIndex, final int mateAlignmentStart, final int recordsToNextFragment) { - ValidationUtils.nonNull( qualityScores,"quality scores argument must be null or nonzero length"); - ValidationUtils.nonNull(readBases,"read bases argument cannot be null"); + ValidationUtils.nonNull(qualityScores, "quality scores argument must be null or nonzero length"); + ValidationUtils.nonNull(readBases, "read bases argument cannot be null"); ValidationUtils.validateArg(readTags == null || readTags.size() > 0, "invalid read tag argument"); - ValidationUtils.validateArg(readFeaturesList == null || readFeaturesList.size() > 0, "invalid read features argument"); + ValidationUtils.validateArg( + readFeaturesList == null || readFeaturesList.size() > 0, "invalid read features argument"); ValidationUtils.validateArg(sequentialIndex >= 0, "index must be >= 0"); this.sequentialIndex = sequentialIndex; @@ -279,12 +315,11 @@ public CRAMCompressionRecord( this.recordsToNextFragment = recordsToNextFragment; // its acceptable to have a mapped, placed read, but no read features, if the read matches the // reference exactly - readFeatures = readFeaturesList == null ? - new CRAMRecordReadFeatures() : - new CRAMRecordReadFeatures(readFeaturesList); - alignmentEnd = isPlaced() ? - this.readFeatures.getAlignmentEnd(alignmentStart, readLength) : - AlignmentContext.NO_ALIGNMENT_END; + readFeatures = + readFeaturesList == null ? new CRAMRecordReadFeatures() : new CRAMRecordReadFeatures(readFeaturesList); + alignmentEnd = isPlaced() + ? this.readFeatures.getAlignmentEnd(alignmentStart, readLength) + : AlignmentContext.NO_ALIGNMENT_END; } /** @@ -293,8 +328,8 @@ public CRAMCompressionRecord( * @return a SAMRecord */ public SAMRecord toSAMRecord(final SAMFileHeader samFileHeader) { - ValidationUtils.nonNull( samFileHeader,"a valid sam header is required"); - ValidationUtils.validateArg( isNormalized,"record must be normalized to convert to SAMRecord"); + ValidationUtils.nonNull(samFileHeader, "a valid sam header is required"); + ValidationUtils.validateArg(isNormalized, "record must be normalized to convert to SAMRecord"); final SAMRecord samRecord = new SAMRecord(samFileHeader); samRecord.setReadName(readName); @@ -313,14 +348,12 @@ public SAMRecord toSAMRecord(final SAMFileHeader samFileHeader) { if (isSegmentUnmapped()) { samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); } else { - samRecord.setCigar(readFeatures.getCigarForReadFeatures(readLength)); + samRecord.setCigar(cachedCigar != null ? cachedCigar : readFeatures.getCigarForReadFeatures(readLength)); } if (samRecord.getReadPairedFlag()) { samRecord.setMateReferenceIndex(mateReferenceIndex); - samRecord.setMateAlignmentStart(mateAlignmentStart > 0 ? - mateAlignmentStart : - SAMRecord.NO_ALIGNMENT_START); + samRecord.setMateAlignmentStart(mateAlignmentStart > 0 ? mateAlignmentStart : SAMRecord.NO_ALIGNMENT_START); samRecord.setMateNegativeStrandFlag(isMateNegativeStrand()); samRecord.setMateUnmappedFlag(isMateUnmapped()); } else { @@ -339,15 +372,27 @@ public SAMRecord toSAMRecord(final SAMFileHeader samFileHeader) { } if (readGroupID != NO_READGROUP_ID) { - final SAMReadGroupRecord readGroupRecord = samFileHeader.getReadGroups().get(readGroupID); + final SAMReadGroupRecord readGroupRecord = + samFileHeader.getReadGroups().get(readGroupID); samRecord.setAttribute("RG", readGroupRecord.getId()); } return samRecord; } - //TODO: how to resolve readnames when we don’t save them for supplementary / secondary reads that don’t - //appear near their primaries and don’t have a primary linking to them? + /** + * Assign a synthetic read name based on the sequential index if no name was decoded. + * Propagates the name to linked next/previous segments. + * + *

    Note: supplementary and secondary reads are always DETACHED (never mate-linked), so they + * have no nextSegment/previousSegment and name propagation won’t help them. This is only safe + * because htsjdk always sets preserveReadNames=true when encoding (see + * {@link htsjdk.samtools.cram.build.CompressionHeaderFactory#createCompressionHeader}). If lossy + * read name mode were ever implemented, supplementary/secondary reads in different slices from + * their primaries would receive synthetic names that don’t match. htslib avoids this by forcing + * name preservation whenever SA tags are present and when not all template reads are in the + * same slice.

    + */ public void assignReadName() { if (readName == null) { readName = Long.toString(getSequentialIndex()); @@ -367,7 +412,9 @@ public void assignReadName() { * across all records in a Slice. * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)}). */ - void setIsNormalized() { isNormalized = true; } + void setIsNormalized() { + isNormalized = true; + } /** * When a CRAM record is read from a CRAM stream, it is "raw" in that the record's read bases, quality @@ -377,7 +424,9 @@ public void assignReadName() { * (see {@link Slice#normalizeCRAMRecords(List, CRAMReferenceRegion)}). * @return true if this record is normalized */ - public boolean isNormalized() { return isNormalized; } + public boolean isNormalized() { + return isNormalized; + } /** * Resolve the quality scores for this CRAM record based on preserved scores, read features and flags. @@ -398,11 +447,7 @@ public void resolveQualityScores() { final Scores scoresFeature = (Scores) feature; pos = scoresFeature.getPosition(); System.arraycopy( - scoresFeature.getScores(), - 0, - scores, - pos - 1, - scoresFeature.getScores().length); + scoresFeature.getScores(), 0, scores, pos - 1, scoresFeature.getScores().length); hasMissingScores = false; break; case ReadBase.operator: @@ -431,27 +476,38 @@ public void resolveQualityScores() { } /** - * The method is similar in semantics to - * {@link htsjdk.samtools.SamPairUtil#computeInsertSize(SAMRecord, SAMRecord) - * computeInsertSize} but operates on CRAM native records instead of - * SAMRecord objects. + * Compute the insert size (TLEN) for a mate pair using the htslib/samtools convention: + * the absolute value is the number of bases from the leftmost mapped base to the rightmost + * mapped base across both mates. The sign is positive for the leftmost read (by alignment + * start), negative for the rightmost, with ties broken by the first-of-pair flag. + * + *

    This matches htslib's TLEN computation in cram_encode.c / cram_decode.c, ensuring + * that linked mate pairs produce identical TLEN values regardless of which CRAM implementation + * performs the decoding. + * + *

    Note: this differs from {@link htsjdk.samtools.SamPairUtil#computeInsertSize(SAMRecord, SAMRecord)} + * which uses 5'-to-5' distance. This method is used only for CRAM mate linking and restoration. * * @param firstEnd first mate of the pair * @param secondEnd second mate of the pair - * @return template length + * @return template length for firstEnd (negate for secondEnd) */ - private static int computeInsertSize(final CRAMCompressionRecord firstEnd, final CRAMCompressionRecord secondEnd) { - if (firstEnd.isSegmentUnmapped() || - secondEnd.isSegmentUnmapped()|| - firstEnd.referenceIndex != secondEnd.referenceIndex) { + static int computeInsertSize(final CRAMCompressionRecord firstEnd, final CRAMCompressionRecord secondEnd) { + if (firstEnd.isSegmentUnmapped() + || secondEnd.isSegmentUnmapped() + || firstEnd.referenceIndex != secondEnd.referenceIndex) { return 0; } - final int firstEnd5PrimePosition = firstEnd.isNegativeStrand() ? firstEnd.getAlignmentEnd() : firstEnd.alignmentStart; - final int secondEnd5PrimePosition = secondEnd.isNegativeStrand() ? secondEnd.getAlignmentEnd() : secondEnd.alignmentStart; + final int aleft = Math.min(firstEnd.alignmentStart, secondEnd.alignmentStart); + final int aright = Math.max(firstEnd.getAlignmentEnd(), secondEnd.getAlignmentEnd()); + final int magnitude = aright - aleft + 1; - final int adjustment = (secondEnd5PrimePosition >= firstEnd5PrimePosition) ? +1 : -1; - return secondEnd5PrimePosition - firstEnd5PrimePosition + adjustment; + // Positive for leftmost read, negative for rightmost; tie-break by first-of-pair flag. + if (firstEnd.alignmentStart < secondEnd.alignmentStart) return magnitude; + if (firstEnd.alignmentStart > secondEnd.alignmentStart) return -magnitude; + if (firstEnd.isFirstSegment()) return magnitude; + return -magnitude; } /** @@ -466,9 +522,7 @@ public void restoreReadBases(CRAMReferenceRegion cramReferenceRegion, final Subs readBases = SAMRecord.NULL_SEQUENCE; } else { readBases = CRAMRecordReadFeatures.restoreReadBases( - readFeatures == null ? - Collections.EMPTY_LIST : - readFeatures.getReadFeaturesList(), + readFeatures == null ? Collections.EMPTY_LIST : readFeatures.getReadFeaturesList(), isUnknownBases(), alignmentStart, readLength, @@ -477,9 +531,79 @@ public void restoreReadBases(CRAMReferenceRegion cramReferenceRegion, final Subs } } + /** + * Fused single-pass method: restore read bases from the reference + read features, build the + * CIGAR, and compute NM/MD tags, all in one iteration through the features. Replaces the + * previous separate calls to {@code restoreReadBases} + {@code restoreNmAndMd}. + * + *

    The CIGAR is cached on this record for use by {@link #toSAMRecord()}. + */ + void restoreBasesAndTags( + final CRAMReferenceRegion cramReferenceRegion, final SubstitutionMatrix substitutionMatrix) { + // Handle the cF internal tag from htslib's embed_ref=2 mode + boolean suppressMD = false; + boolean suppressNM = false; + if (tags != null) { + for (int i = tags.size() - 1; i >= 0; i--) { + if ("cF".equals(tags.get(i).getKey())) { + final int cf = ((Number) tags.get(i).getValue()).intValue(); + suppressMD = (cf & 1) != 0; + suppressNM = (cf & 2) != 0; + tags.remove(i); + break; + } + } + } + + // Determine if MD/NM computation is needed + boolean hasNM = false; + boolean hasMD = false; + if (tags != null) { + for (final ReadTag tag : tags) { + if ("NM".equals(tag.getKey())) hasNM = true; + if ("MD".equals(tag.getKey())) hasMD = true; + } + } + final boolean needMD = !hasMD && !suppressMD; + final boolean needNM = !hasNM && !suppressNM; + + final boolean computeMdNm = (needMD || needNM) + && readLength > 0 + && readFeatures != null + && cramReferenceRegion.getCurrentReferenceBases() != null; + + final CRAMRecordReadFeatures.DecodeResult result = CRAMRecordReadFeatures.restoreBasesAndTags( + readFeatures == null ? Collections.emptyList() : readFeatures.getReadFeaturesList(), + isUnknownBases(), + alignmentStart, + readLength, + cramReferenceRegion, + substitutionMatrix, + computeMdNm); + + this.readBases = result.readBases; + this.cachedCigar = result.cigar; + + if (computeMdNm) { + if (tags == null) { + tags = new ArrayList<>(2); + } + if (needMD && result.mdString != null) { + tags.add(ReadTag.deriveTypeFromValue("MD", result.mdString)); + } + if (needNM && result.nmCount >= 0) { + tags.add(ReadTag.deriveTypeFromValue("NM", result.nmCount)); + } + } + } + ////////////////////////////////////// // Start Mate code ////////////////////////////////////// + /** + * Restore mate information by walking the linked list of segments starting from this record, + * setting mate fields on each segment pair, and computing template length. + */ public void restoreMateInfo() { if (getNextSegment() == null) { return; @@ -499,12 +623,18 @@ public void restoreMateInfo() { last.templateSize = -templateLength; } + /** Mark this record as detached — mate info stored explicitly via MF, NS, NP, TS data series. */ public void setToDetachedState() { setDetached(true); setHasMateDownStream(false); recordsToNextFragment = -1; } + /** Set the NF (records-to-next-fragment) offset for attached mate pairs. */ + void setRecordsToNextFragment(final int recordsToNextFragment) { + this.recordsToNextFragment = recordsToNextFragment; + } + private void setNextMate(final CRAMCompressionRecord next) { mateAlignmentStart = next.alignmentStart; setMateUnmapped(next.isSegmentUnmapped()); @@ -542,66 +672,101 @@ public boolean isPlaced() { } } else if (!isSegmentUnmapped()) { final String warning = String.format( - "CRAMRecord [%s] appears to be mapped but does not have a valid alignment start.", - this.toString()); + "CRAMRecord [%s] appears to be mapped but does not have a valid alignment start.", this.toString()); log.warn(warning); } return placed; } - public String getReadName() { return readName; } + public String getReadName() { + return readName; + } - public int getAlignmentStart() { return alignmentStart; } + public int getAlignmentStart() { + return alignmentStart; + } - public int getReadLength() { return readLength; } + public int getReadLength() { + return readLength; + } - public byte[] getReadBases() { return readBases; } + public byte[] getReadBases() { + return readBases; + } - public byte[] getQualityScores() { return qualityScores; } + public byte[] getQualityScores() { + return qualityScores; + } - public int getMappingQuality() { return mappingQuality; } + public int getMappingQuality() { + return mappingQuality; + } - public int getReferenceIndex() { return referenceIndex; } + public int getReferenceIndex() { + return referenceIndex; + } - public int getTemplateSize() { return templateSize; } + public int getTemplateSize() { + return templateSize; + } - public List getTags() { return tags; } + public List getTags() { + return tags; + } - public int getRecordsToNextFragment() { return recordsToNextFragment; } + public int getRecordsToNextFragment() { + return recordsToNextFragment; + } public List getReadFeatures() { - return readFeatures == null ? - null : - readFeatures.getReadFeaturesList(); + return readFeatures == null ? null : readFeatures.getReadFeaturesList(); } /** * @return read group id, or {@link #NO_READGROUP_ID} if no read group assigned */ - public int getReadGroupID() { return readGroupID; } + public int getReadGroupID() { + return readGroupID; + } - public int getBAMFlags() { return bamFlags; } + public int getBAMFlags() { + return bamFlags; + } - public int getMateReferenceIndex() { return mateReferenceIndex; } + public int getMateReferenceIndex() { + return mateReferenceIndex; + } - public int getMateAlignmentStart() { return mateAlignmentStart; } + public int getMateAlignmentStart() { + return mateAlignmentStart; + } public void setTagIdsIndex(MutableInt tagIdsIndex) { - //TODO: why is this value deliberately shared across records + // The MutableInt is intentionally shared by reference across all records with the same tag + // combination. CompressionHeaderFactory.buildTagIdsFromCRAMRecords() groups records by their + // tag set and assigns a shared MutableInt to each group for counting and dictionary indexing. this.tagIdsIndex = tagIdsIndex; } - public MutableInt getTagIdsIndex() { return tagIdsIndex; } + public MutableInt getTagIdsIndex() { + return tagIdsIndex; + } - public int getMateFlags() { return (0xFF & mateFlags); } + public int getMateFlags() { + return (0xFF & mateFlags); + } - public int getCRAMFlags() { return (0xFF & cramFlags); } + public int getCRAMFlags() { + return (0xFF & cramFlags); + } /** * @return the initialized alignmentEnd */ - public int getAlignmentEnd() { return alignmentEnd; } + public int getAlignmentEnd() { + return alignmentEnd; + } // used in read name generation and mate restoration public long getSequentialIndex() { @@ -624,50 +789,65 @@ public void setPreviousSegment(CRAMCompressionRecord previousSegment) { this.previousSegment = previousSegment; } + /** Return true if this record has the secondary alignment BAM flag set. */ public boolean isSecondaryAlignment() { return (bamFlags & SAMFlag.SECONDARY_ALIGNMENT.intValue()) != 0; } private void setSecondaryAlignment(final boolean secondaryAlignment) { - bamFlags = secondaryAlignment ? - bamFlags | SAMFlag.SECONDARY_ALIGNMENT.intValue() : - bamFlags & ~SAMFlag.SECONDARY_ALIGNMENT.intValue(); + bamFlags = secondaryAlignment + ? bamFlags | SAMFlag.SECONDARY_ALIGNMENT.intValue() + : bamFlags & ~SAMFlag.SECONDARY_ALIGNMENT.intValue(); } + /** Return true if this record's CRAM flags indicate a downstream mate in the same slice. */ public boolean isHasMateDownStream() { return isHasMateDownStream(cramFlags); } + /** Test the has-mate-downstream bit in the given CRAM flags value. */ public static boolean isHasMateDownStream(final int cramFlags) { return (cramFlags & CF_HAS_MATE_DOWNSTREAM) != 0; } + /** Return true if this record stores mate information explicitly (detached state). */ public boolean isDetached() { return isDetached(cramFlags); } - public static boolean isDetached(final int cramFlags) { return (cramFlags & CF_DETACHED) != 0; } + /** Test the detached bit in the given CRAM flags value. */ + public static boolean isDetached(final int cramFlags) { + return (cramFlags & CF_DETACHED) != 0; + } + /** Return true if quality scores are preserved as a full array for this record. */ public boolean isForcePreserveQualityScores() { return isForcePreserveQualityScores(cramFlags); } - public static boolean isForcePreserveQualityScores(final int cramFlags) {return (cramFlags & CF_QS_PRESERVED_AS_ARRAY) != 0; } + /** Test the quality-scores-preserved-as-array bit in the given CRAM flags value. */ + public static boolean isForcePreserveQualityScores(final int cramFlags) { + return (cramFlags & CF_QS_PRESERVED_AS_ARRAY) != 0; + } + /** Return true if the original sequence was unknown (SEQ="*"). */ public boolean isUnknownBases() { return isUnknownBases(cramFlags); } + /** Test the unknown-bases bit in the given CRAM flags value. */ public static boolean isUnknownBases(final int cramFlags) { return (cramFlags & CF_UNKNOWN_BASES) != 0; } + /** Return true if this record has the read-paired BAM flag set. */ public boolean isReadPaired() { return (bamFlags & SAMFlag.READ_PAIRED.intValue()) != 0; } private void setMultiFragment(final boolean multiFragment) { - bamFlags = multiFragment ? bamFlags | SAMFlag.READ_PAIRED.intValue() : bamFlags & ~SAMFlag.READ_PAIRED.intValue(); + bamFlags = + multiFragment ? bamFlags | SAMFlag.READ_PAIRED.intValue() : bamFlags & ~SAMFlag.READ_PAIRED.intValue(); } /** @@ -682,26 +862,37 @@ public boolean isSegmentUnmapped() { return isSegmentUnmapped(bamFlags); } - public static boolean isSegmentUnmapped(final int bamFlags) { return (bamFlags & SAMFlag.READ_UNMAPPED.intValue()) != 0; } + /** Test the segment-unmapped bit in the given BAM flags value. */ + public static boolean isSegmentUnmapped(final int bamFlags) { + return (bamFlags & SAMFlag.READ_UNMAPPED.intValue()) != 0; + } private void setSegmentUnmapped(final boolean segmentUnmapped) { - bamFlags = segmentUnmapped ? bamFlags | SAMFlag.READ_UNMAPPED.intValue() : bamFlags & ~SAMFlag.READ_UNMAPPED.intValue(); + bamFlags = segmentUnmapped + ? bamFlags | SAMFlag.READ_UNMAPPED.intValue() + : bamFlags & ~SAMFlag.READ_UNMAPPED.intValue(); } + /** Return true if this record is the first segment in the template. */ public boolean isFirstSegment() { return (bamFlags & SAMFlag.FIRST_OF_PAIR.intValue()) != 0; } private void setFirstSegment(final boolean firstSegment) { - bamFlags = firstSegment ? bamFlags | SAMFlag.FIRST_OF_PAIR.intValue() : bamFlags & ~SAMFlag.FIRST_OF_PAIR.intValue(); + bamFlags = firstSegment + ? bamFlags | SAMFlag.FIRST_OF_PAIR.intValue() + : bamFlags & ~SAMFlag.FIRST_OF_PAIR.intValue(); } + /** Return true if this record is the last segment in the template. */ public boolean isLastSegment() { return (bamFlags & SAMFlag.SECOND_OF_PAIR.intValue()) != 0; } private void setLastSegment(final boolean lastSegment) { - bamFlags = lastSegment ? bamFlags | SAMFlag.SECOND_OF_PAIR.intValue() : bamFlags & ~SAMFlag.SECOND_OF_PAIR.intValue(); + bamFlags = lastSegment + ? bamFlags | SAMFlag.SECOND_OF_PAIR.intValue() + : bamFlags & ~SAMFlag.SECOND_OF_PAIR.intValue(); } private boolean isVendorFiltered() { @@ -709,7 +900,9 @@ private boolean isVendorFiltered() { } private void setVendorFiltered(final boolean vendorFiltered) { - bamFlags = vendorFiltered ? bamFlags | SAMFlag.READ_FAILS_VENDOR_QUALITY_CHECK.intValue() : bamFlags & ~SAMFlag.READ_FAILS_VENDOR_QUALITY_CHECK.intValue(); + bamFlags = vendorFiltered + ? bamFlags | SAMFlag.READ_FAILS_VENDOR_QUALITY_CHECK.intValue() + : bamFlags & ~SAMFlag.READ_FAILS_VENDOR_QUALITY_CHECK.intValue(); } private boolean isProperPair() { @@ -725,7 +918,9 @@ private boolean isDuplicate() { } private void setDuplicate(final boolean duplicate) { - bamFlags = duplicate ? bamFlags | SAMFlag.DUPLICATE_READ.intValue() : bamFlags & ~SAMFlag.DUPLICATE_READ.intValue(); + bamFlags = duplicate + ? bamFlags | SAMFlag.DUPLICATE_READ.intValue() + : bamFlags & ~SAMFlag.DUPLICATE_READ.intValue(); } private boolean isNegativeStrand() { @@ -733,7 +928,9 @@ private boolean isNegativeStrand() { } private void setNegativeStrand(final boolean negativeStrand) { - bamFlags = negativeStrand ? bamFlags | SAMFlag.READ_REVERSE_STRAND.intValue() : bamFlags & ~SAMFlag.READ_REVERSE_STRAND.intValue(); + bamFlags = negativeStrand + ? bamFlags | SAMFlag.READ_REVERSE_STRAND.intValue() + : bamFlags & ~SAMFlag.READ_REVERSE_STRAND.intValue(); } // NOTE: "mate unmapped" and "mate negative strand" (MF_MATE_UNMAPPED and MF_MATE_NEG_STRAND, aka @@ -747,13 +944,12 @@ private void setNegativeStrand(final boolean negativeStrand) { private boolean isMateUnmapped() { return (mateFlags & MF_MATE_UNMAPPED) != 0; } + private void setMateUnmapped(final boolean mateUnmapped) { - mateFlags = mateUnmapped ? - mateFlags | MF_MATE_UNMAPPED : - mateFlags & ~MF_MATE_UNMAPPED; - bamFlags = mateUnmapped ? - bamFlags | SAMFlag.MATE_UNMAPPED.intValue() : - bamFlags & ~SAMFlag.MATE_UNMAPPED.intValue(); + mateFlags = mateUnmapped ? mateFlags | MF_MATE_UNMAPPED : mateFlags & ~MF_MATE_UNMAPPED; + bamFlags = mateUnmapped + ? bamFlags | SAMFlag.MATE_UNMAPPED.intValue() + : bamFlags & ~SAMFlag.MATE_UNMAPPED.intValue(); } // Note that this flag is maintained in both the bamFlags and mateFlags; we only test the mate flags here private boolean isMateNegativeStrand() { @@ -762,38 +958,41 @@ private boolean isMateNegativeStrand() { // Note that this flag is maintained in both the bamFlags and mateFlags, so we set both here private void setMateNegativeStrand(final boolean mateNegativeStrand) { - mateFlags = mateNegativeStrand ? - mateFlags | MF_MATE_NEG_STRAND : - mateFlags & ~MF_MATE_NEG_STRAND; - bamFlags = mateNegativeStrand ? - bamFlags | SAMFlag.MATE_REVERSE_STRAND.intValue() : - bamFlags & ~SAMFlag.MATE_REVERSE_STRAND.intValue(); + mateFlags = mateNegativeStrand ? mateFlags | MF_MATE_NEG_STRAND : mateFlags & ~MF_MATE_NEG_STRAND; + bamFlags = mateNegativeStrand + ? bamFlags | SAMFlag.MATE_REVERSE_STRAND.intValue() + : bamFlags & ~SAMFlag.MATE_REVERSE_STRAND.intValue(); } - private void setHasMateDownStream(final boolean hasMateDownStream) { + /** Set or clear the has-mate-downstream CRAM flag. */ + void setHasMateDownStream(final boolean hasMateDownStream) { cramFlags = hasMateDownStream ? cramFlags | CF_HAS_MATE_DOWNSTREAM : cramFlags & ~CF_HAS_MATE_DOWNSTREAM; } + /** Set or clear the detached CRAM flag (mate info stored explicitly). */ public void setDetached(final boolean detached) { cramFlags = detached ? cramFlags | CF_DETACHED : cramFlags & ~CF_DETACHED; } - private void setUnknownBases(final boolean unknownBases) { + private void setUnknownBases(final boolean unknownBases) { cramFlags = unknownBases ? cramFlags | CF_UNKNOWN_BASES : cramFlags & ~CF_UNKNOWN_BASES; } - private boolean isSupplementary() { + /** Return true if this record has the supplementary alignment BAM flag set. */ + boolean isSupplementary() { return (bamFlags & SAMFlag.SUPPLEMENTARY_ALIGNMENT.intValue()) != 0; } private void setSupplementary(final boolean supplementary) { - bamFlags = supplementary ? bamFlags | SAMFlag.SUPPLEMENTARY_ALIGNMENT.intValue() : bamFlags & ~SAMFlag.SUPPLEMENTARY_ALIGNMENT.intValue(); + bamFlags = supplementary + ? bamFlags | SAMFlag.SUPPLEMENTARY_ALIGNMENT.intValue() + : bamFlags & ~SAMFlag.SUPPLEMENTARY_ALIGNMENT.intValue(); } private void setForcePreserveQualityScores(final boolean forcePreserveQualityScores) { - cramFlags = forcePreserveQualityScores ? - cramFlags | CF_QS_PRESERVED_AS_ARRAY : - cramFlags & ~CF_QS_PRESERVED_AS_ARRAY; + cramFlags = forcePreserveQualityScores + ? cramFlags | CF_QS_PRESERVED_AS_ARRAY + : cramFlags & ~CF_QS_PRESERVED_AS_ARRAY; } private static void copyFlags(final CRAMCompressionRecord cramCompressionRecord, final SAMRecord samRecord) { @@ -829,10 +1028,9 @@ public boolean equals(Object o) { if (getMateAlignmentStart() != that.getMateAlignmentStart()) return false; if (getMateReferenceIndex() != that.getMateReferenceIndex()) return false; if (getRecordsToNextFragment() != that.getRecordsToNextFragment()) return false; - if (getReadFeatures() != null ? - !getReadFeatures().equals(that.getReadFeatures()) : - that.getReadFeatures() != null) - return false; + if (getReadFeatures() != null + ? !getReadFeatures().equals(that.getReadFeatures()) + : that.getReadFeatures() != null) return false; if (getTags() != null ? !getTags().equals(that.getTags()) : that.getTags() != null) return false; if (getReadName() != null ? !getReadName().equals(that.getReadName()) : that.getReadName() != null) return false; @@ -840,12 +1038,12 @@ public boolean equals(Object o) { return false; } if (!Arrays.equals(getQualityScores(), that.getQualityScores())) return false; - if (!getTagIdsIndex().equals(that.getTagIdsIndex())) - return false; + if (!getTagIdsIndex().equals(that.getTagIdsIndex())) return false; if (getNextSegment() != null ? !getNextSegment().equals(that.getNextSegment()) : that.getNextSegment() != null) return false; - return getPreviousSegment() != null ? getPreviousSegment().equals(that.getPreviousSegment()) : - that.getPreviousSegment() == null; + return getPreviousSegment() != null + ? getPreviousSegment().equals(that.getPreviousSegment()) + : that.getPreviousSegment() == null; } @Override @@ -871,8 +1069,8 @@ public int hashCode() { result = 31 * result + getMateReferenceIndex(); result = 31 * result + getRecordsToNextFragment(); result = 31 * result + (getNextSegment() != null ? getNextSegment().hashCode() : 0); - result = 31 * result + (getPreviousSegment() != null ? getPreviousSegment().hashCode() : 0); + result = 31 * result + + (getPreviousSegment() != null ? getPreviousSegment().hashCode() : 0); return result; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMEncodingStrategy.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMEncodingStrategy.java index 4a2762187c..b107ae89b2 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CRAMEncodingStrategy.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMEncodingStrategy.java @@ -25,76 +25,129 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.Defaults; -import htsjdk.utils.ValidationUtils; +import htsjdk.samtools.cram.common.CRAMVersion; +import htsjdk.samtools.cram.common.CramVersions; import htsjdk.samtools.cram.ref.ReferenceContextType; +import htsjdk.utils.ValidationUtils; +import java.util.EnumMap; /** - * Parameters that can be set to control the encoding strategy used when writing CRAM. + * Parameters that control the encoding strategy used when writing CRAM. Includes the CRAM version, + * compression level, container/slice sizing, and per-{@link DataSeries} compressor assignments. + * + *

    The default constructor applies the {@link CRAMCompressionProfile#NORMAL} profile. Use + * {@link CRAMCompressionProfile#toStrategy()} or {@link CRAMCompressionProfile#applyTo(CRAMEncodingStrategy)} + * to configure a specific profile. + * + * @see CRAMCompressionProfile + * @see CompressorDescriptor */ public class CRAMEncodingStrategy { - // Default value for the minimum number of reads we need to have seen to emit a single-reference slice. - // If we've see fewer than this number, and we have more reads from a different reference context, we prefer to - // switch to, and subsequently emit, a multiple reference slice, rather than a small single-reference - // that contains fewer than this number of records. public static final int DEFAULT_MINIMUM_SINGLE_REFERENCE_SLICE_THRESHOLD = 1000; - - // This number must be >= DEFAULT_MINIMUM_SINGLE_REFERENCE_SLICE_THRESHOLD (required by ContainerFactory). public static final int DEFAULT_READS_PER_SLICE = 10000; + /** + * Default ratio of bases-per-slice to reads-per-slice. Matches htslib's default rule + * {@code bases_per_slice = seqs_per_slice * 500}. A slice is flushed when either the + * record count or the accumulated base count is reached, preventing individual slices + * from growing pathologically large when input reads are long (PacBio HiFi, ONT). + */ + public static final int DEFAULT_BASES_PER_READ = 500; - // encoding strategies - private CompressionHeaderEncodingMap customCompressionHeaderEncodingMap; - - //Note: should this have separate values for tags (separate from CRAMRecord data) ? + private CRAMVersion cramVersion = CramVersions.CRAM_v3_1; private int gzipCompressionLevel = Defaults.COMPRESSION_LEVEL; - - // The minimum number of reads we need to have seen to emit a single-reference slice. If we've seen - // fewer than this number, and we have more reads from a different reference context, we prefer to - // switch to, and subsequently emit, a multiple reference slice, rather than a small single-reference - // that contains fewer than this number of records. This number must be < readsPerSlice. private int minimumSingleReferenceSliceSize = DEFAULT_MINIMUM_SINGLE_REFERENCE_SLICE_THRESHOLD; private int readsPerSlice = DEFAULT_READS_PER_SLICE; + // 0 means "derive from readsPerSlice at query time". Explicit override via setBasesPerSlice. + private long basesPerSlice = 0; private int slicesPerContainer = 1; + private EnumMap compressorMap; + + // Optional: additional trial compression candidates per data series. When present for a data series, + // the primary compressor from compressorMap plus these additional candidates are wrapped in a + // TrialCompressor that tries all and picks the smallest output. + private EnumMap> trialCandidatesMap; + + // Whether to store NM:i and MD:Z tags verbatim. When false (default), these tags are stripped + // during encoding for mapped reads and regenerated from the reference during decoding. Matches + // htslib's store_nm/store_md options (both default to 0/false). + private boolean storeNM = false; + private boolean storeMD = false; + + // Advanced override: a pre-built encoding map that bypasses the compressor map entirely. + // Used by tests that need low-level control over encoding descriptors. + private CompressionHeaderEncodingMap customCompressionHeaderEncodingMap; + /** - * Create an encoding strategy that uses all default values. + * Create an encoding strategy with the {@link CRAMCompressionProfile#NORMAL} profile applied. */ public CRAMEncodingStrategy() { - // use defaults; + CRAMCompressionProfile.NORMAL.applyTo(this); } /** - * Set number of slices per container. In some cases, a container containing fewer slices than the + * Package-private constructor that skips profile application. Used by + * {@link CRAMCompressionProfile#toStrategy()} to avoid infinite recursion. + * + * @param applyDefaultProfile ignored — exists only to differentiate from the default constructor + */ + CRAMEncodingStrategy(final boolean applyDefaultProfile) { + // no profile applied; caller is responsible for calling applyTo() + } + + /** @return the CRAM version to write */ + public CRAMVersion getCramVersion() { + return cramVersion; + } + + /** + * Set the CRAM version to write. + * + * @param cramVersion the CRAM version (e.g., {@link CramVersions#CRAM_v3} or {@link CramVersions#CRAM_v3_1}) + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setCramVersion(final CRAMVersion cramVersion) { + ValidationUtils.nonNull(cramVersion, "CRAM version must not be null"); + this.cramVersion = cramVersion; + return this; + } + + /** + * Set number of reads per slice. In some cases, a container containing fewer slices than the * requested value will be produced in order to honor the specification rule that all slices in a * container must have the same {@link ReferenceContextType}. * * Note: this value must be >= {@link #getMinimumSingleReferenceSliceSize}. * - * @param readsPerSlice number of slices written per container + * @param readsPerSlice number of reads written per slice * @return updated CRAMEncodingStrategy */ public CRAMEncodingStrategy setReadsPerSlice(final int readsPerSlice) { ValidationUtils.validateArg( readsPerSlice > 0 && readsPerSlice >= minimumSingleReferenceSliceSize, - String.format("Reads per slice must be > 0 and < minimum single reference slice size (%d)", + String.format( + "Reads per slice must be > 0 and >= minimum single reference slice size (%d)", minimumSingleReferenceSliceSize)); this.readsPerSlice = readsPerSlice; return this; } - /** - * The minimum number of reads we need to have seen to emit a single-reference slice. If we've seen - * fewer than this number, and we have more reads from a different reference context, we prefer to - * switch to, and subsequently emit, a multiple reference slice, rather than a small single-reference - * that contains fewer than this number of records. - * - * This number must be < the value for {@link #getReadsPerSlice} - * - * @param minimumSingleReferenceSliceSize - */ + /** + * The minimum number of reads we need to have seen to emit a single-reference slice. If we've seen + * fewer than this number, and we have more reads from a different reference context, we prefer to + * switch to, and subsequently emit, a multiple reference slice, rather than a small single-reference + * that contains fewer than this number of records. + * + * This number must be {@code <=} the value for {@link #getReadsPerSlice} + * + * @param minimumSingleReferenceSliceSize the minimum slice size + * @return this strategy for chaining + */ public CRAMEncodingStrategy setMinimumSingleReferenceSliceSize(int minimumSingleReferenceSliceSize) { ValidationUtils.validateArg( minimumSingleReferenceSliceSize <= readsPerSlice, - String.format("Minimm single reference slice size must be < the reads per slice size (%d)", readsPerSlice)); + String.format( + "Minimum single reference slice size must be <= the reads per slice size (%d)", readsPerSlice)); this.minimumSingleReferenceSliceSize = minimumSingleReferenceSliceSize; return this; } @@ -103,9 +156,41 @@ public int getMinimumSingleReferenceSliceSize() { return minimumSingleReferenceSliceSize; } + /** + * Set the maximum accumulated bases per slice. When the accumulated bases in a slice + * reaches this threshold, the slice is flushed even if {@link #getReadsPerSlice} has + * not been reached. This prevents individual slices from growing pathologically large + * for long-read data (PacBio HiFi, ONT). + * + *

    Setting a value of 0 reverts to the default ({@link #getReadsPerSlice} {@code *} + * {@link #DEFAULT_BASES_PER_READ}), matching htslib's rule. + * + * @param basesPerSlice maximum bases per slice, or 0 to use the default + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setBasesPerSlice(final long basesPerSlice) { + ValidationUtils.validateArg(basesPerSlice >= 0, "basesPerSlice must be >= 0"); + this.basesPerSlice = basesPerSlice; + return this; + } + + /** + * @return the bases-per-slice threshold. If no explicit value was set, returns + * {@link #getReadsPerSlice} {@code *} {@link #DEFAULT_BASES_PER_READ} (matching htslib). + */ + public long getBasesPerSlice() { + return basesPerSlice > 0 ? basesPerSlice : (long) readsPerSlice * DEFAULT_BASES_PER_READ; + } + + /** + * Set the GZIP compression level used for data series compressed with GZIP. + * + * @param compressionLevel GZIP compression level (0-10) + * @return this strategy for chaining + */ public CRAMEncodingStrategy setGZIPCompressionLevel(final int compressionLevel) { - ValidationUtils.validateArg(compressionLevel >=0 && compressionLevel <= 10, - "cram gzip compression level must be > 0 and <= 10"); + ValidationUtils.validateArg( + compressionLevel >= 0 && compressionLevel <= 10, "cram gzip compression level must be >= 0 and <= 10"); this.gzipCompressionLevel = compressionLevel; return this; } @@ -113,40 +198,129 @@ public CRAMEncodingStrategy setGZIPCompressionLevel(final int compressionLevel) /** * Set the number of slices per container. If > 1, multiple slices will be placed in the same container * if the slices share the same reference context (container records mapped to the same contig). MULTI-REF - * slices are always emitted as a single contain to avoid conferring MULTI-REF on the next slice, which + * slices are always emitted as a single container to avoid conferring MULTI-REF on the next slice, which * might otherwise be single-ref; the spec requires a MULTI_REF container to only contain multi-ref slices). - * @param slicesPerContainer - requested number of slices per container - * @return CRAMEncodingStrategy + * + * @param slicesPerContainer requested number of slices per container + * @return this strategy for chaining */ public CRAMEncodingStrategy setSlicesPerContainer(final int slicesPerContainer) { - ValidationUtils.validateArg(slicesPerContainer >=0, "slicesPerContainer must be > 0"); + ValidationUtils.validateArg(slicesPerContainer > 0, "slicesPerContainer must be > 0"); this.slicesPerContainer = slicesPerContainer; return this; } /** - * Set the {@link CompressionHeaderEncodingMap} to use. + * Set the per-DataSeries compressor map. Each entry maps a {@link DataSeries} to the + * {@link CompressorDescriptor} that should be used to compress its block. + * + * @param compressorMap the compressor map (defensively copied) + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setCompressorMap(final EnumMap compressorMap) { + ValidationUtils.nonNull(compressorMap, "compressor map must not be null"); + this.compressorMap = new EnumMap<>(compressorMap); + return this; + } + + /** @return the per-DataSeries compressor map, or null if not set */ + public EnumMap getCompressorMap() { + return compressorMap; + } + + /** + * Set additional trial compression candidates per DataSeries. For data series with entries in + * this map, a {@link htsjdk.samtools.cram.compression.TrialCompressor} will be created that + * tries the primary compressor plus all listed candidates, selecting the smallest output. + * + * @param trialCandidatesMap map of data series to additional candidate descriptors + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setTrialCandidatesMap( + final EnumMap> trialCandidatesMap) { + this.trialCandidatesMap = trialCandidatesMap != null ? new EnumMap<>(trialCandidatesMap) : null; + return this; + } + + /** @return the trial candidates map, or null if trial compression is not configured */ + public EnumMap> getTrialCandidatesMap() { + return trialCandidatesMap; + } + + /** + * Set a pre-built {@link CompressionHeaderEncodingMap} that bypasses the compressor map. + * This is an advanced override intended for tests that need low-level control over encoding + * descriptors. When set, {@link htsjdk.samtools.cram.build.CompressionHeaderFactory} will use + * this map directly instead of building one from the compressor map. * - * @param encodingMap the encoding map to use + * @param encodingMap the encoding map to use, or null to use the compressor map */ public void setCustomCompressionHeaderEncodingMap(final CompressionHeaderEncodingMap encodingMap) { this.customCompressionHeaderEncodingMap = encodingMap; } - public CompressionHeaderEncodingMap getCustomCompressionHeaderEncodingMap() { return customCompressionHeaderEncodingMap; } - public int getGZIPCompressionLevel() { return gzipCompressionLevel; } - public int getReadsPerSlice() { return readsPerSlice; } - public int getSlicesPerContainer() { return slicesPerContainer; } + /** @return the custom encoding map, or null if the compressor map should be used */ + public CompressionHeaderEncodingMap getCustomCompressionHeaderEncodingMap() { + return customCompressionHeaderEncodingMap; + } + + /** + * Set whether to store the NM:i tag verbatim. When false (default), NM is stripped during + * encoding for mapped reads and regenerated from features + reference during decoding. + * Matches htslib's {@code CRAM_OPT_STORE_NM} option. + * + * @param storeNM true to store NM verbatim, false to strip and regenerate + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setStoreNM(final boolean storeNM) { + this.storeNM = storeNM; + return this; + } + + /** @return whether NM:i tags are stored verbatim (false = stripped and regenerated) */ + public boolean getStoreNM() { + return storeNM; + } + + /** + * Set whether to store the MD:Z tag verbatim. When false (default), MD is stripped during + * encoding for mapped reads and regenerated from features + reference during decoding. + * Matches htslib's {@code CRAM_OPT_STORE_MD} option. + * + * @param storeMD true to store MD verbatim, false to strip and regenerate + * @return this strategy for chaining + */ + public CRAMEncodingStrategy setStoreMD(final boolean storeMD) { + this.storeMD = storeMD; + return this; + } + + /** @return whether MD:Z tags are stored verbatim (false = stripped and regenerated) */ + public boolean getStoreMD() { + return storeMD; + } + + public int getGZIPCompressionLevel() { + return gzipCompressionLevel; + } + + public int getReadsPerSlice() { + return readsPerSlice; + } + + public int getSlicesPerContainer() { + return slicesPerContainer; + } @Override public String toString() { - return "CRAMEncodingStrategy{" + - ", customCompressionMap='" + customCompressionHeaderEncodingMap + '\'' + - ", gzipCompressionLevel=" + gzipCompressionLevel + - ", readsPerSlice=" + readsPerSlice + - ", slicesPerContainer=" + slicesPerContainer + - '}'; + return "CRAMEncodingStrategy{" + "cramVersion=" + + cramVersion + ", gzipCompressionLevel=" + + gzipCompressionLevel + ", readsPerSlice=" + + readsPerSlice + ", slicesPerContainer=" + + slicesPerContainer + '}'; } + @Override public boolean equals(Object o) { if (this == o) return true; @@ -158,20 +332,18 @@ public boolean equals(Object o) { if (getMinimumSingleReferenceSliceSize() != that.getMinimumSingleReferenceSliceSize()) return false; if (getReadsPerSlice() != that.getReadsPerSlice()) return false; if (getSlicesPerContainer() != that.getSlicesPerContainer()) return false; - return getCustomCompressionHeaderEncodingMap() != null ? - getCustomCompressionHeaderEncodingMap().equals(that.getCustomCompressionHeaderEncodingMap()) : - that.getCustomCompressionHeaderEncodingMap() == null; + if (!cramVersion.equals(that.cramVersion)) return false; + return compressorMap != null ? compressorMap.equals(that.compressorMap) : that.compressorMap == null; } @Override public int hashCode() { - int result = getCustomCompressionHeaderEncodingMap() != null ? - getCustomCompressionHeaderEncodingMap().hashCode() : 0; + int result = cramVersion.hashCode(); result = 31 * result + gzipCompressionLevel; result = 31 * result + getMinimumSingleReferenceSliceSize(); result = 31 * result + getReadsPerSlice(); result = 31 * result + getSlicesPerContainer(); + result = 31 * result + (compressorMap != null ? compressorMap.hashCode() : 0); return result; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CRAMRecordReadFeatures.java b/src/main/java/htsjdk/samtools/cram/structure/CRAMRecordReadFeatures.java index 1c081e6dc2..68b58270e0 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CRAMRecordReadFeatures.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CRAMRecordReadFeatures.java @@ -34,13 +34,14 @@ import htsjdk.samtools.cram.encoding.readfeatures.*; import htsjdk.samtools.util.SequenceUtil; import htsjdk.utils.ValidationUtils; - import java.util.*; /** * Class for handling the read features for a {@link CRAMCompressionRecord}. */ public class CRAMRecordReadFeatures { + private static final byte[] BAM_READ_BASE_LOOKUP = SequenceUtil.getBamReadBaseLookup(); + final List readFeatures; /** @@ -73,7 +74,7 @@ public CRAMRecordReadFeatures(final SAMRecord samRecord, final byte[] bamReadBas byte[] readBases = bamReadBases; if (readBases.length == 0) { - //for SAMRecords with SEQ="*", manufacture 'N's + // for SAMRecords with SEQ="*", manufacture 'N's readBases = new byte[cigarLen]; Arrays.fill(readBases, (byte) 'N'); } @@ -129,27 +130,20 @@ public CRAMRecordReadFeatures(final SAMRecord samRecord, final byte[] bamReadBas } } - public final List getReadFeaturesList() { return readFeatures; } + /** Return the list of read features for this record. */ + public final List getReadFeaturesList() { + return readFeatures; + } - private void addSoftClip( - final int zeroBasedPositionInRead, - final int cigarElementLength, - final byte[] readBases) { - final byte[] insertedBases = Arrays.copyOfRange( - readBases, - zeroBasedPositionInRead, - zeroBasedPositionInRead + cigarElementLength); + private void addSoftClip(final int zeroBasedPositionInRead, final int cigarElementLength, final byte[] readBases) { + final byte[] insertedBases = + Arrays.copyOfRange(readBases, zeroBasedPositionInRead, zeroBasedPositionInRead + cigarElementLength); readFeatures.add(new SoftClip(zeroBasedPositionInRead + 1, insertedBases)); } - private void addInsertion( - final int zeroBasedPositionInRead, - final int cigarElementLength, - final byte[] readBases) { - final byte[] insertedBases = Arrays.copyOfRange( - readBases, - zeroBasedPositionInRead, - zeroBasedPositionInRead + cigarElementLength); + private void addInsertion(final int zeroBasedPositionInRead, final int cigarElementLength, final byte[] readBases) { + final byte[] insertedBases = + Arrays.copyOfRange(readBases, zeroBasedPositionInRead, zeroBasedPositionInRead + cigarElementLength); for (int i = 0; i < insertedBases.length; i++) { // Note: when cigarElementLength > 1, this should use a Bases read feature instead of using n // InsertBases read features, but doing so require a ByteArrayLenEncoding, which requires @@ -177,7 +171,7 @@ private void addInsertion( * @param bases the read bases array * @param baseQualities the quality score array */ - //Visible for testing + // Visible for testing static void addMismatchReadFeatures( final byte[] refBases, final int alignmentStart, @@ -192,27 +186,32 @@ static void addMismatchReadFeatures( byte refBase; for (int i = 0; i < nofReadBases; i++, oneBasedPositionInRead++, refIndex++) { - refBase = refIndex >= refBases.length ? - (byte) 'N' : - refBases[refIndex]; + refBase = refIndex >= refBases.length ? (byte) 'N' : refBases[refIndex]; final byte readBase = bases[i + fromPosInRead]; if (readBase != refBase) { - final boolean isSubstitution = SequenceUtil.isUpperACGTN(readBase) && SequenceUtil.isUpperACGTN(refBase); + final boolean isSubstitution = + SequenceUtil.isUpperACGTN(readBase) && SequenceUtil.isUpperACGTN(refBase); if (isSubstitution) { features.add(new Substitution(oneBasedPositionInRead, readBase, refBase)); } else { - final byte score = - baseQualities.equals(SAMRecord.NULL_QUALS) ? - CRAMCompressionRecord.MISSING_QUALITY_SCORE : - baseQualities[i + fromPosInRead] ; + final byte score = baseQualities.equals(SAMRecord.NULL_QUALS) + ? CRAMCompressionRecord.MISSING_QUALITY_SCORE + : baseQualities[i + fromPosInRead]; features.add(new ReadBase(oneBasedPositionInRead, readBase, score)); } } } } + /** + * Compute the alignment end position from the read features, alignment start, and read length. + * + * @param alignmentStart 1-based alignment start position + * @param readLength length of the read in bases + * @return 1-based alignment end position + */ public int getAlignmentEnd(int alignmentStart, int readLength) { int alignmentSpan = readLength; if (readFeatures != null) { @@ -244,9 +243,10 @@ public int getAlignmentEnd(int alignmentStart, int readLength) { } /** - * Get a Cigar fo this set of read features. - * @param readLength - * @return + * Build a {@link Cigar} from these read features and the given read length. + * + * @param readLength the length of the read in bases + * @return the reconstructed CIGAR */ public Cigar getCigarForReadFeatures(final int readLength) { if (readFeatures == null) { @@ -402,12 +402,7 @@ public static byte[] restoreReadBases( 0, Math.min(bases.length, referenceBases.length + zeroBasedReferenceOffset - alignmentStart)); } else { - System.arraycopy( - referenceBases, - alignmentStart - zeroBasedReferenceOffset, - bases, - 0, - bases.length); + System.arraycopy(referenceBases, alignmentStart - zeroBasedReferenceOffset, bases, 0, bases.length); } return SequenceUtil.toBamReadBasesInPlace(bases); } @@ -421,7 +416,8 @@ public static byte[] restoreReadBases( switch (variation.getOperator()) { case Substitution.operator: - byte refBase = getByteOrDefault(referenceBases, alignmentStart + posInSeq - zeroBasedReferenceOffset, (byte) 'N'); + byte refBase = getByteOrDefault( + referenceBases, alignmentStart + posInSeq - zeroBasedReferenceOffset, (byte) 'N'); // substitution requires ACGTN only: refBase = Utils.normalizeBase(refBase); final Substitution substitution = (Substitution) variation; @@ -459,13 +455,17 @@ public static byte[] restoreReadBases( case Padding.operator: case HardClip.operator: break; // handled by getCigarForReadFeatures - default: throw new CRAMException(String.format("Unrecognized read feature code: %c", variation.getOperator())); + default: + throw new CRAMException( + String.format("Unrecognized read feature code: %c", variation.getOperator())); } } if (referenceBases != null) { - for (; posInRead <= readLength - && alignmentStart + posInSeq - zeroBasedReferenceOffset < referenceBases.length; posInRead++, posInSeq++) { + for (; + posInRead <= readLength + && alignmentStart + posInSeq - zeroBasedReferenceOffset < referenceBases.length; + posInRead++, posInSeq++) { bases[posInRead - 1] = referenceBases[alignmentStart + posInSeq - zeroBasedReferenceOffset]; } } @@ -480,11 +480,7 @@ public static byte[] restoreReadBases( case Bases.operator: final Bases basesOp = (Bases) variation; System.arraycopy( - basesOp.getBases(), - 0, - bases, - variation.getPosition() - 1, - basesOp.getBases().length); + basesOp.getBases(), 0, bases, variation.getPosition() - 1, basesOp.getBases().length); break; default: break; @@ -498,9 +494,381 @@ private static byte getByteOrDefault(final byte[] array, final int pos, final by if (array == null) { return outOfBoundsValue; } - return pos >= array.length ? - outOfBoundsValue : - array[pos]; + return pos >= array.length ? outOfBoundsValue : array[pos]; + } + + /** + * Result of the fused single-pass decode: read bases, CIGAR, and optionally MD string + NM count. + */ + public static final class DecodeResult { + public final byte[] readBases; + public final Cigar cigar; + public final String mdString; // null if not computed + public final int nmCount; // -1 if not computed + + DecodeResult(final byte[] readBases, final Cigar cigar, final String mdString, final int nmCount) { + this.readBases = readBases; + this.cigar = cigar; + this.mdString = mdString; + this.nmCount = nmCount; + } + } + + /** + * Fused single-pass decode: restore read bases from the reference + read features, build the CIGAR, + * and optionally compute the MD string and NM edit distance, all in a single iteration through the + * features list. This replaces the previous 3-4 pass approach (restoreReadBases + getCigarForReadFeatures + * + calculateMdAndNm + toBamReadBasesInPlace). + * + *

    Base normalization (upper-casing, replacing invalid bases with N) is done inline as bases are + * written, eliminating the need for a separate {@code toBamReadBasesInPlace} pass. + * + * @param readFeatures list of read features (may be null for pure reference matches) + * @param isUnknownBases true if the CF_UNKNOWN_BASES flag is set + * @param readAlignmentStart 1-based alignment start + * @param readLength read length + * @param cramReferenceRegion reference region covering this read's span + * @param substitutionMatrix substitution matrix for base resolution + * @param computeMdNm whether to compute MD string and NM count + * @return DecodeResult containing bases, CIGAR, and optionally MD/NM + */ + public static DecodeResult restoreBasesAndTags( + final List readFeatures, + final boolean isUnknownBases, + final int readAlignmentStart, + final int readLength, + final CRAMReferenceRegion cramReferenceRegion, + final SubstitutionMatrix substitutionMatrix, + final boolean computeMdNm) { + + if (readLength == 0) { + final Cigar cigar = new Cigar(Collections.singletonList(new CigarElement(readLength, CigarOperator.M))); + return new DecodeResult(SAMRecord.NULL_SEQUENCE, cigar, null, -1); + } + + // When isUnknownBases (CF_UNKNOWN_BASES / seq '*'), we still need to process read features + // to reconstruct the CIGAR (e.g. soft clips stored in SC data series), but skip all base + // restoration, reference lookups, and MD/NM computation. + final byte[] bases = isUnknownBases ? null : new byte[readLength]; + final int alignmentStart = readAlignmentStart - 1; // 0-based + final int refOffset = isUnknownBases ? 0 : cramReferenceRegion.getRegionStart(); + final byte[] refBases = isUnknownBases ? null : cramReferenceRegion.getCurrentReferenceBases(); + final boolean doBasesAndMdNm = !isUnknownBases; + + // MD/NM state — mdActive tracks whether we're still within the reference boundary. + // Once we exceed the reference, we stop MD/NM computation (matching calculateMdAndNm's break behavior). + int nmCount = 0; + final boolean actuallyComputeMdNm = computeMdNm && doBasesAndMdNm; + final StringBuilder mdString = actuallyComputeMdNm ? new StringBuilder(readLength) : null; + int mdMatchRun = 0; + boolean mdActive = actuallyComputeMdNm; + + // No features: pure reference match (fast path) + if (readFeatures == null || readFeatures.isEmpty()) { + if (isUnknownBases) { + final Cigar cigar = new Cigar(Collections.singletonList(new CigarElement(readLength, CigarOperator.M))); + return new DecodeResult(SAMRecord.NULL_SEQUENCE, cigar, null, -1); + } + final int srcStart = alignmentStart - refOffset; + final int copyLen = Math.min(readLength, Math.max(0, refBases.length - srcStart)); + if (copyLen < readLength) { + Arrays.fill(bases, (byte) 'N'); + if (copyLen > 0) System.arraycopy(refBases, srcStart, bases, 0, copyLen); + } else { + System.arraycopy(refBases, srcStart, bases, 0, readLength); + } + + // Normalize bases and compute MD/NM — only within reference boundary + for (int i = 0; i < readLength; i++) { + final byte rawRef = bases[i]; + bases[i] = BAM_READ_BASE_LOOKUP[rawRef & 0x7F]; + if (computeMdNm && i < copyLen) { + if (SequenceUtil.basesEqual(bases[i], rawRef) || bases[i] == 0) { + mdMatchRun++; + } else { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + } + + if (mdActive) mdString.append(mdMatchRun); + final Cigar cigar = new Cigar(Collections.singletonList(new CigarElement(readLength, CigarOperator.M))); + return new DecodeResult(bases, cigar, computeMdNm ? mdString.toString() : null, computeMdNm ? nmCount : -1); + } + + // CIGAR building state + final List cigarElements = new ArrayList<>(); + CigarOperator lastCigOp = CigarOperator.MATCH_OR_MISMATCH; + int lastCigLen = 0; + int lastCigPos = 1; + + // Position tracking (1-based read position, 0-based ref offset from alignment start) + int posInRead = 1; + int posInSeq = 0; + + for (final ReadFeature feature : readFeatures) { + final int featurePos = feature.getPosition(); + + // Fill gap from reference (advance positions; fill bases only when not unknownBases) + if (doBasesAndMdNm) { + while (posInRead < featurePos) { + final int rp = alignmentStart + posInSeq - refOffset; + if (rp >= refBases.length) mdActive = false; + final byte rawRef = getByteOrDefault(refBases, rp, (byte) 'N'); + final byte nb = BAM_READ_BASE_LOOKUP[rawRef & 0x7F]; + bases[posInRead - 1] = nb; + if (mdActive) { + if (SequenceUtil.basesEqual(nb, rawRef) || nb == 0) { + mdMatchRun++; + } else { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + posInRead++; + posInSeq++; + } + } else { + final int gap = featurePos - posInRead; + posInSeq += gap; + posInRead = featurePos; + } + + // Deactivate MD/NM if the current reference position is beyond the reference boundary, + // flushing any accumulated match run first + if (mdActive && (alignmentStart + posInSeq - refOffset) >= refBases.length) { + mdString.append(mdMatchRun); + mdMatchRun = 0; + mdActive = false; + } + + // CIGAR gap + final int gap = featurePos - (lastCigPos + lastCigLen); + if (gap > 0) { + if (lastCigOp != CigarOperator.MATCH_OR_MISMATCH) { + cigarElements.add(new CigarElement(lastCigLen, lastCigOp)); + lastCigPos += lastCigLen; + lastCigLen = gap; + } else { + lastCigLen += gap; + } + lastCigOp = CigarOperator.MATCH_OR_MISMATCH; + } + + CigarOperator featureCigOp; + int featureCigLen; + + switch (feature.getOperator()) { + case Substitution.operator: { + if (doBasesAndMdNm) { + final int rp = alignmentStart + posInSeq - refOffset; + final byte rawRef = getByteOrDefault(refBases, rp, (byte) 'N'); + final byte normRef = Utils.normalizeBase(rawRef); + bases[posInRead - 1] = BAM_READ_BASE_LOOKUP[ + substitutionMatrix.base(normRef, ((Substitution) feature).getCode()) & 0x7F]; + if (mdActive) { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + posInRead++; + posInSeq++; + featureCigOp = CigarOperator.MATCH_OR_MISMATCH; + featureCigLen = 1; + break; + } + case ReadBase.operator: { + if (doBasesAndMdNm) { + final byte readBase = BAM_READ_BASE_LOOKUP[((ReadBase) feature).getBase() & 0x7F]; + bases[posInRead - 1] = readBase; + if (mdActive) { + final int rp = alignmentStart + posInSeq - refOffset; + final byte rawRef = getByteOrDefault(refBases, rp, (byte) 'N'); + if (SequenceUtil.basesEqual(readBase, rawRef)) { + mdMatchRun++; + } else { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + } + posInRead++; + posInSeq++; + featureCigOp = CigarOperator.MATCH_OR_MISMATCH; + featureCigLen = 1; + break; + } + case Bases.operator: { + final byte[] fb = ((Bases) feature).getBases(); + if (doBasesAndMdNm) { + for (int i = 0; i < fb.length; i++) { + bases[posInRead - 1 + i] = BAM_READ_BASE_LOOKUP[fb[i] & 0x7F]; + if (mdActive) { + final int rp = alignmentStart + posInSeq + i - refOffset; + final byte rawRef = getByteOrDefault(refBases, rp, (byte) 'N'); + if (SequenceUtil.basesEqual(bases[posInRead - 1 + i], rawRef)) { + mdMatchRun++; + } else { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + } + } + posInRead += fb.length; + posInSeq += fb.length; + continue; // Bases are within M region, no CIGAR update + } + case Insertion.operator: { + final byte[] seq = ((Insertion) feature).getSequence(); + if (doBasesAndMdNm) { + for (int i = 0; i < seq.length; i++) + bases[posInRead - 1 + i] = BAM_READ_BASE_LOOKUP[seq[i] & 0x7F]; + if (mdActive) nmCount += seq.length; + } + posInRead += seq.length; + featureCigOp = CigarOperator.INSERTION; + featureCigLen = seq.length; + break; + } + case InsertBase.operator: { + if (doBasesAndMdNm) { + bases[posInRead - 1] = BAM_READ_BASE_LOOKUP[((InsertBase) feature).getBase() & 0x7F]; + if (mdActive) nmCount++; + } + posInRead++; + featureCigOp = CigarOperator.INSERTION; + featureCigLen = 1; + break; + } + case SoftClip.operator: { + final byte[] seq = ((SoftClip) feature).getSequence(); + if (doBasesAndMdNm) { + for (int i = 0; i < seq.length; i++) + bases[posInRead - 1 + i] = BAM_READ_BASE_LOOKUP[seq[i] & 0x7F]; + } + posInRead += seq.length; + featureCigOp = CigarOperator.SOFT_CLIP; + featureCigLen = seq.length; + break; + } + case Deletion.operator: { + final int delLen = ((Deletion) feature).getLength(); + if (mdActive) { + mdString.append(mdMatchRun); + mdMatchRun = 0; + mdString.append('^'); + for (int i = 0; i < delLen; i++) { + final int rp = alignmentStart + posInSeq + i - refOffset; + final byte rawRef = getByteOrDefault(refBases, rp, (byte) 'N'); + mdString.append((char) (rawRef & 0xFF)); + } + nmCount += delLen; + } + posInSeq += delLen; + featureCigOp = CigarOperator.DELETION; + featureCigLen = delLen; + break; + } + case RefSkip.operator: + posInSeq += ((RefSkip) feature).getLength(); + featureCigOp = CigarOperator.SKIPPED_REGION; + featureCigLen = ((RefSkip) feature).getLength(); + break; + case HardClip.operator: + featureCigOp = CigarOperator.HARD_CLIP; + featureCigLen = ((HardClip) feature).getLength(); + break; + case Padding.operator: + featureCigOp = CigarOperator.PADDING; + featureCigLen = ((Padding) feature).getLength(); + break; + case Scores.operator: + case BaseQualityScore.operator: + continue; + default: + throw new CRAMException(String.format("Unrecognized read feature code: %c", feature.getOperator())); + } + + // Update CIGAR + if (lastCigOp != featureCigOp) { + if (lastCigLen > 0) cigarElements.add(new CigarElement(lastCigLen, lastCigOp)); + lastCigOp = featureCigOp; + lastCigLen = featureCigLen; + lastCigPos = feature.getPosition(); + } else { + lastCigLen += featureCigLen; + } + if (!featureCigOp.consumesReadBases()) lastCigPos -= featureCigLen; + } + + // Fill trailing reference bases (skip when unknownBases -- just advance positions) + if (doBasesAndMdNm) { + while (posInRead <= readLength) { + final int rp = alignmentStart + posInSeq - refOffset; + if (rp >= refBases.length) { + if (mdActive) { + mdString.append(mdMatchRun); + mdMatchRun = 0; + mdActive = false; + } + while (posInRead <= readLength) { + bases[posInRead - 1] = 'N'; + posInRead++; + } + break; + } + final byte rawRef = refBases[rp]; + bases[posInRead - 1] = BAM_READ_BASE_LOOKUP[rawRef & 0x7F]; + if (mdActive) { + if (SequenceUtil.basesEqual(bases[posInRead - 1], rawRef) || bases[posInRead - 1] == 0) { + mdMatchRun++; + } else { + mdString.append(mdMatchRun); + mdString.append((char) (rawRef & 0xFF)); + mdMatchRun = 0; + nmCount++; + } + } + posInRead++; + posInSeq++; + } + } else { + posInRead = readLength + 1; + } + + // Finalize CIGAR + if (lastCigOp != CigarOperator.M) { + if (lastCigLen > 0) cigarElements.add(new CigarElement(lastCigLen, lastCigOp)); + if (readLength >= lastCigPos + lastCigLen) { + cigarElements.add(new CigarElement(readLength - (lastCigLen + lastCigPos) + 1, CigarOperator.M)); + } + } else if (readLength > lastCigPos - 1) { + cigarElements.add(new CigarElement(readLength - lastCigPos + 1, CigarOperator.M)); + } + + final Cigar cigar = cigarElements.isEmpty() + ? new Cigar(Collections.singletonList(new CigarElement(readLength, CigarOperator.M))) + : new Cigar(cigarElements); + + if (mdActive) mdString.append(mdMatchRun); + + return new DecodeResult( + isUnknownBases ? SAMRecord.NULL_SEQUENCE : bases, + cigar, + actuallyComputeMdNm ? mdString.toString() : null, + actuallyComputeMdNm ? nmCount : -1); } @Override @@ -510,9 +878,9 @@ public boolean equals(Object o) { CRAMRecordReadFeatures that = (CRAMRecordReadFeatures) o; - return getReadFeaturesList() != null ? - getReadFeaturesList().equals(that.getReadFeaturesList()) : - that.getReadFeaturesList() == null; + return getReadFeaturesList() != null + ? getReadFeaturesList().equals(that.getReadFeaturesList()) + : that.getReadFeaturesList() == null; } @Override diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeader.java b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeader.java index 9f013662e4..eccc8236e0 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeader.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeader.java @@ -25,7 +25,6 @@ import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockContentType; import htsjdk.samtools.util.RuntimeIOException; - import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; @@ -52,6 +51,7 @@ public class CompressionHeader { private final Map tagEncodingMap = new TreeMap<>(); private SubstitutionMatrix substitutionMatrix; private byte[][][] tagIDDictionary; + private TagKeyCache tagKeyCache; /** * Create a CompressionHeader using the default {@link CRAMEncodingStrategy} @@ -80,22 +80,22 @@ public CompressionHeader(final CompressionHeaderEncodingMap encodingMap) { } /** - * Read a COMPRESSION_HEADER Block from an InputStream and return its contents as a CompressionHeader. + * Read a COMPRESSION_HEADER Block from an InputStream and populate this CompressionHeader from its contents. * * @param cramVersion the CRAM version * @param blockStream the stream to read from - * @return a new CompressionHeader */ public CompressionHeader(final CRAMVersion cramVersion, final InputStream blockStream) { final Block compressionHeaderBlock = Block.read(cramVersion, blockStream); if (compressionHeaderBlock.getContentType() != BlockContentType.COMPRESSION_HEADER) { - throw new RuntimeIOException( - String.format("Compression header block expected, found: %s", - compressionHeaderBlock.getContentType().name())); + throw new RuntimeIOException(String.format( + "Compression header block expected, found: %s", + compressionHeaderBlock.getContentType().name())); } // get raw content since compression headers are always raw... - try (final ByteArrayInputStream internalStream = new ByteArrayInputStream(compressionHeaderBlock.getRawContent())) { + try (final ByteArrayInputStream internalStream = + new ByteArrayInputStream(compressionHeaderBlock.getRawContent())) { internalRead(internalStream); } catch (final IOException e) { throw new RuntimeIOException(e); @@ -106,7 +106,9 @@ public CompressionHeader(final CRAMVersion cramVersion, final InputStream blockS * Get the {@link CompressionHeaderEncodingMap} for this compression header. * @return {@link CompressionHeaderEncodingMap} for this {@link CompressionHeader} */ - public CompressionHeaderEncodingMap getEncodingMap() { return encodingMap; } + public CompressionHeaderEncodingMap getEncodingMap() { + return encodingMap; + } /** * Write this CompressionHeader out to an internal OutputStream, wrap it in a Block, and write that @@ -120,8 +122,7 @@ public void write(final CRAMVersion cramVersion, final OutputStream blockStream) internalWrite(internalOutputStream); final Block block = Block.createRawCompressionHeaderBlock(internalOutputStream.toByteArray()); block.write(cramVersion, blockStream); - } - catch (final IOException e) { + } catch (final IOException e) { throw new RuntimeIOException(e); } } @@ -151,8 +152,17 @@ public byte[][][] getTagIDDictionary() { return tagIDDictionary; } - public void setTagIdDictionary(final byte[][][] dictionary) { + public void setTagIdDictionary(final byte[][][] dictionary) { this.tagIDDictionary = dictionary; + this.tagKeyCache = new TagKeyCache(dictionary); + } + + /** + * Returns the {@link TagKeyCache} for looking up pre-computed tag key metadata. + * Built from the tag ID dictionary when the compression header is parsed. + */ + public TagKeyCache getTagKeyCache() { + return tagKeyCache; } public void setSubstitutionMatrix(final SubstitutionMatrix substitutionMatrix) { @@ -228,18 +238,16 @@ private void internalRead(final InputStream is) { final int mapSize = ITF8.readUnsignedITF8(buffer); for (int i = 0; i < mapSize; i++) { - final String key = new String(new byte[]{buffer.get(), buffer.get()}); - if (RN_readNamesIncluded.equals(key)) - preserveReadNames = buffer.get() == 1; - else if (AP_alignmentPositionIsDelta.equals(key)) - APDelta = buffer.get() == 1; - else if (RR_referenceRequired.equals(key)) - referenceRequired = buffer.get() == 1; + final String key = new String(new byte[] {buffer.get(), buffer.get()}); + if (RN_readNamesIncluded.equals(key)) preserveReadNames = buffer.get() == 1; + else if (AP_alignmentPositionIsDelta.equals(key)) APDelta = buffer.get() == 1; + else if (RR_referenceRequired.equals(key)) referenceRequired = buffer.get() == 1; else if (TD_tagIdsDictionary.equals(key)) { final int size = ITF8.readUnsignedITF8(buffer); final byte[] dictionaryBytes = new byte[size]; buffer.get(dictionaryBytes); tagIDDictionary = parseDictionary(dictionaryBytes); + tagKeyCache = new TagKeyCache(tagIDDictionary); } else if (SM_substitutionMatrix.equals(key)) { // parse subs matrix here: final byte[] matrixBytes = new byte[SubstitutionMatrix.BASES_SIZE]; @@ -279,55 +287,56 @@ else if (TD_tagIdsDictionary.equals(key)) { } private void internalWrite(final OutputStream outputStream) throws IOException { + // Each map below is written to outputStream as a length-prefixed byte + // array, so we need to know the full serialized size before writing. + // Buffer to a ByteArrayOutputStream first, then emit [length][bytes]. + // Pre-sized to 16 KB: enough for typical tag sets without reallocation, + // but small enough that we don't waste memory when most of it is unused. + // Rich tag sets (PacBio/Ultima flow-space, ONT mod bases) grow via the + // usual doubling -- a few reallocations are cheap relative to the final + // size. Reused across both blocks via reset(). + final ByteArrayOutputStream mapStream = new ByteArrayOutputStream(16 * 1024); + { // preservation map: - final ByteBuffer mapBuffer = ByteBuffer.allocate(1024 * 100); - ITF8.writeUnsignedITF8(5, mapBuffer); + ITF8.writeUnsignedITF8(5, mapStream); - mapBuffer.put(RN_readNamesIncluded.getBytes()); - mapBuffer.put((byte) (preserveReadNames ? 1 : 0)); + mapStream.write(RN_readNamesIncluded.getBytes()); + mapStream.write(preserveReadNames ? 1 : 0); - mapBuffer.put(AP_alignmentPositionIsDelta.getBytes()); - mapBuffer.put((byte) (APDelta ? 1 : 0)); + mapStream.write(AP_alignmentPositionIsDelta.getBytes()); + mapStream.write(APDelta ? 1 : 0); - mapBuffer.put(RR_referenceRequired.getBytes()); - mapBuffer.put((byte) (referenceRequired ? 1 : 0)); + mapStream.write(RR_referenceRequired.getBytes()); + mapStream.write(referenceRequired ? 1 : 0); - mapBuffer.put(SM_substitutionMatrix.getBytes()); - mapBuffer.put(substitutionMatrix.getEncodedMatrix()); + mapStream.write(SM_substitutionMatrix.getBytes()); + mapStream.write(substitutionMatrix.getEncodedMatrix()); - mapBuffer.put(TD_tagIdsDictionary.getBytes()); + mapStream.write(TD_tagIdsDictionary.getBytes()); final byte[] dictionaryBytes = dictionaryToByteArray(); - ITF8.writeUnsignedITF8(dictionaryBytes.length, mapBuffer); - mapBuffer.put(dictionaryBytes); + ITF8.writeUnsignedITF8(dictionaryBytes.length, mapStream); + mapStream.write(dictionaryBytes); - mapBuffer.flip(); - final byte[] mapBytes = new byte[mapBuffer.limit()]; - mapBuffer.get(mapBytes); - - ITF8.writeUnsignedITF8(mapBytes.length, outputStream); - outputStream.write(mapBytes); + ITF8.writeUnsignedITF8(mapStream.size(), outputStream); + mapStream.writeTo(outputStream); } encodingMap.write(outputStream); { // tag encoding map: - final ByteBuffer mapBuffer = ByteBuffer.allocate(1024 * 100); - ITF8.writeUnsignedITF8(tagEncodingMap.size(), mapBuffer); + mapStream.reset(); + ITF8.writeUnsignedITF8(tagEncodingMap.size(), mapStream); for (final Integer dataSeries : tagEncodingMap.keySet()) { - ITF8.writeUnsignedITF8(dataSeries, mapBuffer); + ITF8.writeUnsignedITF8(dataSeries, mapStream); final EncodingDescriptor params = tagEncodingMap.get(dataSeries); - mapBuffer.put((byte) (0xFF & params.getEncodingID().getId())); - ITF8.writeUnsignedITF8(params.getEncodingParameters().length, mapBuffer); - mapBuffer.put(params.getEncodingParameters()); + mapStream.write(0xFF & params.getEncodingID().getId()); + ITF8.writeUnsignedITF8(params.getEncodingParameters().length, mapStream); + mapStream.write(params.getEncodingParameters()); } - mapBuffer.flip(); - final byte[] mapBytes = new byte[mapBuffer.limit()]; - mapBuffer.get(mapBytes); - ITF8.writeUnsignedITF8(mapBytes.length, outputStream); - outputStream.write(mapBytes); + ITF8.writeUnsignedITF8(mapStream.size(), outputStream); + mapStream.writeTo(outputStream); } } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java index 4394b49eab..b49a72f2f6 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressionHeaderEncodingMap.java @@ -26,7 +26,10 @@ import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.compression.ExternalCompressor; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; +import htsjdk.samtools.cram.compression.TrialCompressor; +import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationDecode; +import htsjdk.samtools.cram.compression.range.RangeParams; +import htsjdk.samtools.cram.compression.rans.RANSNx16Params; import htsjdk.samtools.cram.encoding.CRAMEncoding; import htsjdk.samtools.cram.encoding.external.ByteArrayStopEncoding; import htsjdk.samtools.cram.encoding.external.ExternalByteEncoding; @@ -36,17 +39,17 @@ import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; +import htsjdk.samtools.util.Log; import htsjdk.utils.ValidationUtils; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; -import htsjdk.samtools.util.Log; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -76,12 +79,15 @@ public class CompressionHeaderEncodingMap { // Set of obsolete DataSeries that are ignored on CRAM read - public static final Set DATASERIES_NOT_READ_BY_HTSJDK = Collections.unmodifiableSet(new LinkedHashSet() {{ - add(DataSeries.TC_TagCount); - add(DataSeries.TN_TagNameAndType); - }}); + public static final Set DATASERIES_NOT_READ_BY_HTSJDK = + Collections.unmodifiableSet(new LinkedHashSet() { + { + add(DataSeries.TC_TagCount); + add(DataSeries.TN_TagNameAndType); + } + }); - private final static Log LOG = Log.getInstance(CompressionHeaderEncodingMap.class); + private static final Log LOG = Log.getInstance(CompressionHeaderEncodingMap.class); // Encoding descriptors for each data series. (These encodings can be either EXTERNAL or CORE, although // the spec does not make a clear distinction between EXTERNAL and CODE for encodings; only for blocks. @@ -100,53 +106,88 @@ public class CompressionHeaderEncodingMap { private final CompressorCache compressorCache = new CompressorCache(); /** - * Constructor used to create the default encoding map for writing CRAMs. The encoding strategy - * parameter values are used to set compression levels, etc, but any encoding map embedded is ignored - * since this uses the default strategy. + * Constructor used to create the encoding map for writing CRAMs. The per-DataSeries compressor + * assignments are read from the strategy's compressor map (set by {@link CRAMCompressionProfile}). * - * @param encodingStrategy {@link CRAMEncodingStrategy} containing parameter values to use when creating - * the encoding map + *

    Most data series use a plain external encoding with the specified compressor. Special cases: + *

      + *
    • {@code RN_ReadName} with {@code NAME_TOKENISER}: uses {@code ByteArrayStopEncoding} with + * the name tokeniser separator
    • + *
    • {@code IN_Insertion} and {@code SC_SoftClip}: use {@code ByteArrayStopEncoding} with tab delimiter
    • + *
    • {@code RN_ReadName} without {@code NAME_TOKENISER}: uses {@code ByteArrayStopEncoding} with tab
    • + *
    + * + * @param encodingStrategy {@link CRAMEncodingStrategy} containing the compressor map and compression levels */ public CompressionHeaderEncodingMap(final CRAMEncodingStrategy encodingStrategy) { ValidationUtils.nonNull(encodingStrategy, "An encoding strategy must be provided"); - ValidationUtils.validateArg( - encodingStrategy.getCustomCompressionHeaderEncodingMap() == null, - "A custom compression map cannot be used with this constructor"); - - // NOTE: all of these encodings use external blocks and compressors for actual CRAM - // data. The only use of core block encodings are as params for other (external) - // encodings, i.e., the ByteArrayLenEncoding used for tag data uses a core (sub-)encoding - // to store the length of the array that is stored in an external block. - putExternalRansOrderZeroEncoding(DataSeries.AP_AlignmentPositionOffset); - putExternalRansOrderOneEncoding(DataSeries.BA_Base); - // the BB data series is not used by this implementation when writing CRAMs - putExternalRansOrderOneEncoding(DataSeries.BF_BitFlags); - putExternalGzipEncoding(encodingStrategy, DataSeries.BS_BaseSubstitutionCode); - putExternalRansOrderOneEncoding(DataSeries.CF_CompressionBitFlags); - putExternalGzipEncoding(encodingStrategy, DataSeries.DL_DeletionLength); - putExternalGzipEncoding(encodingStrategy, DataSeries.FC_FeatureCode); - putExternalGzipEncoding(encodingStrategy, DataSeries.FN_NumberOfReadFeatures); - putExternalGzipEncoding(encodingStrategy, DataSeries.FP_FeaturePosition); - putExternalGzipEncoding(encodingStrategy, DataSeries.HC_HardClip); - putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.IN_Insertion); - putExternalGzipEncoding(encodingStrategy, DataSeries.MF_MateBitFlags); - putExternalGzipEncoding(encodingStrategy, DataSeries.MQ_MappingQualityScore); - putExternalGzipEncoding(encodingStrategy, DataSeries.NF_RecordsToNextFragment); - putExternalGzipEncoding(encodingStrategy, DataSeries.NP_NextFragmentAlignmentStart); - putExternalRansOrderOneEncoding(DataSeries.NS_NextFragmentReferenceSequenceID); - putExternalGzipEncoding(encodingStrategy, DataSeries.PD_padding); - // the QQ data series is not used by this implementation when writing CRAMs - putExternalRansOrderOneEncoding(DataSeries.QS_QualityScore); - putExternalRansOrderOneEncoding(DataSeries.RG_ReadGroup); - putExternalRansOrderZeroEncoding(DataSeries.RI_RefId); - putExternalRansOrderOneEncoding(DataSeries.RL_ReadLength); - putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.RN_ReadName); - putExternalGzipEncoding(encodingStrategy, DataSeries.RS_RefSkip); - putExternalByteArrayStopTabGzipEncoding(encodingStrategy, DataSeries.SC_SoftClip); - // the TC data series is obsolete - putExternalGzipEncoding(encodingStrategy, DataSeries.TL_TagIdList); - // the TN data series is obsolete - putExternalRansOrderOneEncoding(DataSeries.TS_InsertSize); + final EnumMap compressorMap = encodingStrategy.getCompressorMap(); + ValidationUtils.nonNull(compressorMap, "Encoding strategy must have a compressor map"); + final EnumMap> trialMap = + encodingStrategy.getTrialCandidatesMap(); + + for (final Map.Entry entry : compressorMap.entrySet()) { + final DataSeries ds = entry.getKey(); + final CompressorDescriptor desc = entry.getValue(); + + // Build the compressor, potentially wrapping in TrialCompressor if trial candidates exist + final ExternalCompressor compressor = buildCompressor(ds, desc, trialMap); + + // Data series with special encoding types + if (ds == DataSeries.RN_ReadName) { + if (desc.method() == BlockCompressionMethod.NAME_TOKENISER) { + putExternalEncoding( + ds, + new ByteArrayStopEncoding( + NameTokenisationDecode.NAME_SEPARATOR, ds.getExternalBlockContentId()) + .toEncodingDescriptor(), + compressor); + } else { + putExternalEncoding( + ds, + new ByteArrayStopEncoding((byte) '\t', ds.getExternalBlockContentId()) + .toEncodingDescriptor(), + compressor); + } + } else if (ds == DataSeries.IN_Insertion || ds == DataSeries.SC_SoftClip) { + putExternalEncoding( + ds, + new ByteArrayStopEncoding((byte) '\t', ds.getExternalBlockContentId()).toEncodingDescriptor(), + compressor); + } else { + putExternalEncoding(ds, compressor); + } + } + } + + /** + * Build a compressor for a data series, wrapping in {@link TrialCompressor} if additional + * trial candidates are configured for that series. + */ + /** + * Build a compressor for a data series, wrapping in {@link TrialCompressor} if additional + * trial candidates are configured for that series. + */ + private ExternalCompressor buildCompressor( + final DataSeries ds, + final CompressorDescriptor primaryDesc, + final EnumMap> trialMap) { + final ExternalCompressor primary = + compressorCache.getCompressorForMethod(primaryDesc.method(), primaryDesc.arg()); + + if (trialMap != null && trialMap.containsKey(ds)) { + final java.util.List trialDescs = trialMap.get(ds); + if (trialDescs != null && !trialDescs.isEmpty()) { + final java.util.List candidates = new java.util.ArrayList<>(); + candidates.add(primary); + for (final CompressorDescriptor td : trialDescs) { + candidates.add(compressorCache.getCompressorForMethod(td.method(), td.arg())); + } + return new TrialCompressor(candidates); + } + } + + return primary; } /** @@ -162,7 +203,7 @@ public CompressionHeaderEncodingMap(final InputStream inputStream) { final int mapSize = ITF8.readUnsignedITF8(buffer); for (int i = 0; i < mapSize; i++) { - final String dataSeriesAbbreviation = new String(new byte[]{buffer.get(), buffer.get()}); + final String dataSeriesAbbreviation = new String(new byte[] {buffer.get(), buffer.get()}); final DataSeries dataSeries = DataSeries.byCanonicalName(dataSeriesAbbreviation); final EncodingID id = EncodingID.values()[buffer.get()]; @@ -173,8 +214,7 @@ public CompressionHeaderEncodingMap(final InputStream inputStream) { // if TC, TN DataSeries are present, log a warning and ignore on CRAM read if (DATASERIES_NOT_READ_BY_HTSJDK.contains(dataSeries)) { LOG.warn("Ignoring obsolete CRAM dataseries: " + dataSeries.getCanonicalName()); - } - else { + } else { // NOTE: the compression associated with this DataSeries is a property of the BLOCK in which it // resides, not of the encoding, so the externalCompressors map isn't populated when reading a @@ -191,7 +231,8 @@ public CompressionHeaderEncodingMap(final InputStream inputStream) { */ public void putTagBlockCompression(final int tagId, final ExternalCompressor compressor) { ValidationUtils.validateArg( - Arrays.asList(DataSeries.values()).stream().noneMatch(ds -> ds.getExternalBlockContentId().intValue() == tagId), + Arrays.asList(DataSeries.values()).stream() + .noneMatch(ds -> ds.getExternalBlockContentId().intValue() == tagId), String.format("tagID %d overlaps with data series content ID", tagId)); externalCompressors.put(tagId, compressor); } @@ -209,7 +250,9 @@ public EncodingDescriptor getEncodingDescriptorForDataSeries(final DataSeries da * Get a list of all external IDs for this encoding map * @return list of all external IDs for this encoding map */ - public List getExternalIDs() { return new ArrayList(externalCompressors.keySet()); } + public List getExternalIDs() { + return new ArrayList(externalCompressors.keySet()); + } /** * Given a content ID, return a {@link Block} for that ID by obtaining the contents of the stream, @@ -218,14 +261,29 @@ public EncodingDescriptor getEncodingDescriptorForDataSeries(final DataSeries da * @param outputStream stream to compress * @return Block containing the compressed contends of the stream */ - public Block createCompressedBlockForStream(final CRAMCodecModelContext contextModel, final Integer contentId, final ByteArrayOutputStream outputStream) { + public Block createCompressedBlockForStream( + final CRAMCodecModelContext contextModel, + final Integer contentId, + final ByteArrayOutputStream outputStream) { final ExternalCompressor compressor = externalCompressors.get(contentId); final byte[] rawContent = outputStream.toByteArray(); - return Block.createExternalBlock( - compressor.getMethod(), - contentId, - compressor.compress(rawContent, contextModel), - rawContent.length); + // Compress first, then query the method — TrialCompressor determines its method + // during the first call to compress(). + final byte[] compressedContent = compressor.compress(rawContent, contextModel); + return Block.createExternalBlock(compressor.getMethod(), contentId, compressedContent, rawContent.length); + } + + /** + * Same as {@link #createCompressedBlockForStream} but accepts a {@link htsjdk.samtools.cram.io.CRAMByteWriter}. + */ + public Block createCompressedBlockForWriter( + final CRAMCodecModelContext contextModel, + final Integer contentId, + final htsjdk.samtools.cram.io.CRAMByteWriter writer) { + final ExternalCompressor compressor = externalCompressors.get(contentId); + final byte[] rawContent = writer.toByteArray(); + final byte[] compressedContent = compressor.compress(rawContent, contextModel); + return Block.createExternalBlock(compressor.getMethod(), contentId, compressedContent, rawContent.length); } /** @@ -280,20 +338,18 @@ public void write(final OutputStream outputStream) throws IOException { * @param encodingStrategy encoding strategy parameters to use * @return the best {@link ExternalCompressor} to use for this data */ - public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRAMEncodingStrategy encodingStrategy) { + public ExternalCompressor getBestExternalCompressor( + final byte[] data, final CRAMEncodingStrategy encodingStrategy) { final ExternalCompressor gzip = compressorCache.getCompressorForMethod( - BlockCompressionMethod.GZIP, - encodingStrategy.getGZIPCompressionLevel()); + BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel()); final int gzipLen = gzip.compress(data, null).length; final ExternalCompressor rans0 = compressorCache.getCompressorForMethod( - BlockCompressionMethod.RANS, - RANS4x8Params.ORDER.ZERO.ordinal()); - final int rans0Len = rans0.compress(data,null).length; + BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ZERO.ordinal()); + final int rans0Len = rans0.compress(data, null).length; final ExternalCompressor rans1 = compressorCache.getCompressorForMethod( - BlockCompressionMethod.RANS, - RANS4x8Params.ORDER.ONE.ordinal()); + BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal()); final int rans1Len = rans1.compress(data, null).length; // find the best of general purpose codecs: @@ -311,7 +367,7 @@ public ExternalCompressor getBestExternalCompressor(final byte[] data, final CRA // encoding map that contains the handful of data series that htsjdk generally doesn't use // when writing, since there is no code to add those data series to the map as part of the // CRAM write implementation. - //VisibleForTesting + // VisibleForTesting void putExternalEncoding(final DataSeries dataSeries, final ExternalCompressor compressor) { // This spins up a CRAMEncoding temporarily in order to retrieve its EncodingDescriptor. // In reality, the encoding descriptor/parameters for each of these external encoding @@ -346,9 +402,10 @@ void putExternalEncoding(final DataSeries dataSeries, final ExternalCompressor c * @param dataSeries data series to add * @param encodingDescriptor encoding descriptor to use */ - //VisibleForTesting + // VisibleForTesting void putCoreEncoding(final DataSeries dataSeries, final EncodingDescriptor encodingDescriptor) { - ValidationUtils.validateArg(!encodingDescriptor.getEncodingID().isExternalEncoding(), + ValidationUtils.validateArg( + !encodingDescriptor.getEncodingID().isExternalEncoding(), "Attempt to use an external encoding as a core encoding"); if (externalCompressors.containsKey(dataSeries.getExternalBlockContentId())) { externalCompressors.remove(dataSeries.getExternalBlockContentId()); @@ -368,11 +425,19 @@ private void putEncoding(final DataSeries dataSeries, final EncodingDescriptor e encodingMap.put(dataSeries, encodingDescriptor); } - // add an external encoding and corresponding compressor - public void putExternalEncoding(final DataSeries dataSeries, - final EncodingDescriptor encodingDescriptor, - final ExternalCompressor compressor) { - ValidationUtils.validateArg(encodingDescriptor.getEncodingID().isExternalEncoding(), + /** + * Add an external encoding and its corresponding compressor to the encoding map. + * + * @param dataSeries the data series to encode + * @param encodingDescriptor the encoding descriptor (must be an external encoding) + * @param compressor the external compressor to use for this data series' block + */ + public void putExternalEncoding( + final DataSeries dataSeries, + final EncodingDescriptor encodingDescriptor, + final ExternalCompressor compressor) { + ValidationUtils.validateArg( + encodingDescriptor.getEncodingID().isExternalEncoding(), "Attempt to use an external encoding as a core encoding"); putEncoding(dataSeries, encodingDescriptor); // add the external compressor after the call to putEncoding, since putEncoding removes @@ -380,31 +445,68 @@ public void putExternalEncoding(final DataSeries dataSeries, externalCompressors.put(dataSeries.getExternalBlockContentId(), compressor); } - private void putExternalByteArrayStopTabGzipEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) { - putExternalEncoding(dataSeries, + private void putExternalByteArrayStopTabGzipEncoding( + final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) { + putExternalEncoding( + dataSeries, new ByteArrayStopEncoding((byte) '\t', dataSeries.getExternalBlockContentId()).toEncodingDescriptor(), - compressorCache.getCompressorForMethod(BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel())); + compressorCache.getCompressorForMethod( + BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel())); + } + + private void putByteArrayStopNameTokEncoding( + final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) { + // ByteArrayStopEncoding is paired with name tokenisation since using it with the + // NameTokenisationDecode.NAME_SEPARATOR conveniently writes the read name data in the NAME_SEPARATOR + // delimited/terminated format that is expected by the downstream tokenisation compressor code + putExternalEncoding( + dataSeries, + new ByteArrayStopEncoding(NameTokenisationDecode.NAME_SEPARATOR, dataSeries.getExternalBlockContentId()) + .toEncodingDescriptor(), + compressorCache.getCompressorForMethod(BlockCompressionMethod.NAME_TOKENISER, 0)); } // add an external encoding appropriate for the dataSeries value type, with a GZIP compressor private void putExternalGzipEncoding(final CRAMEncodingStrategy encodingStrategy, final DataSeries dataSeries) { putExternalEncoding( dataSeries, - compressorCache.getCompressorForMethod(BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel())); + compressorCache.getCompressorForMethod( + BlockCompressionMethod.GZIP, encodingStrategy.getGZIPCompressionLevel())); } // add an external encoding appropriate for the dataSeries value type, with a RANS order 1 compressor private void putExternalRansOrderOneEncoding(final DataSeries dataSeries) { putExternalEncoding( dataSeries, - compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ONE.ordinal())); + compressorCache.getCompressorForMethod( + BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ONE.ordinal())); } // add an external encoding appropriate for the dataSeries value type, with a RANS order 0 compressor private void putExternalRansOrderZeroEncoding(final DataSeries dataSeries) { putExternalEncoding( dataSeries, - compressorCache.getCompressorForMethod(BlockCompressionMethod.RANS, RANS4x8Params.ORDER.ZERO.ordinal())); + compressorCache.getCompressorForMethod( + BlockCompressionMethod.RANSNx16, RANSNx16Params.ORDER.ZERO.ordinal())); + } + + // add an external encoding appropriate for the dataSeries value type, with a FQZComp quality score compressor + private void putExternalFQZCompEncoding(final DataSeries dataSeries) { + putExternalEncoding(dataSeries, compressorCache.getCompressorForMethod(BlockCompressionMethod.FQZCOMP, 0)); + } + + // add an external encoding appropriate for the dataSeries value type, with a Range (arithmetic) order 1 compressor + private void putExternalRangeOrderOneEncoding(final DataSeries dataSeries) { + putExternalEncoding( + dataSeries, + compressorCache.getCompressorForMethod( + BlockCompressionMethod.ADAPTIVE_ARITHMETIC, RangeParams.ORDER_FLAG_MASK)); + } + + // add an external encoding appropriate for the dataSeries value type, with a Range (arithmetic) order 0 compressor + private void putExternalRangeOrderZeroEncoding(final DataSeries dataSeries) { + putExternalEncoding( + dataSeries, compressorCache.getCompressorForMethod(BlockCompressionMethod.ADAPTIVE_ARITHMETIC, 0)); } @Override @@ -424,5 +526,4 @@ public int hashCode() { result = 31 * result + externalCompressors.hashCode(); return result; } - -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java index a7e28511d8..a314557060 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorCache.java @@ -27,15 +27,14 @@ import htsjdk.samtools.cram.compression.ExternalCompressor; import htsjdk.samtools.cram.compression.RANS4x8ExternalCompressor; import htsjdk.samtools.cram.compression.RANSNx16ExternalCompressor; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Decode; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Encode; -import htsjdk.samtools.cram.compression.rans.rans4x8.RANS4x8Params; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode; -import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params; +import htsjdk.samtools.cram.compression.rans.RANS4x8Decode; +import htsjdk.samtools.cram.compression.rans.RANS4x8Encode; +import htsjdk.samtools.cram.compression.rans.RANS4x8Params; +import htsjdk.samtools.cram.compression.rans.RANSNx16Decode; +import htsjdk.samtools.cram.compression.rans.RANSNx16Encode; +import htsjdk.samtools.cram.compression.rans.RANSNx16Params; import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; import htsjdk.utils.ValidationUtils; - import java.util.HashMap; /** @@ -45,8 +44,9 @@ */ public class CompressorCache { private final String argErrorMessage = "Invalid compression arg (%d) requested for CRAM %s compressor"; - //keep track of the compressors we have cached - private record CompressorCacheRecord(BlockCompressionMethod method, int compressorArg) { } + // keep track of the compressors we have cached + private record CompressorCacheRecord(BlockCompressionMethod method, int compressorArg) {} + private final HashMap compressorCache = new HashMap<>(); private RANS4x8Encode sharedRANS4x8Encode; private RANS4x8Decode sharedRANS4x8Decode; @@ -61,8 +61,7 @@ private record CompressorCacheRecord(BlockCompressionMethod method, int compress * @return a cached compressor instance */ public ExternalCompressor getCompressorForMethod( - final BlockCompressionMethod compressionMethod, - final int compressorSpecificArg) { + final BlockCompressionMethod compressionMethod, final int compressorSpecificArg) { switch (compressionMethod) { case GZIP: case ADAPTIVE_ARITHMETIC: @@ -79,14 +78,13 @@ public ExternalCompressor getCompressorForMethod( return getCachedCompressorForMethod(compressionMethod, compressorSpecificArg); case RANS: { - // for efficiency, we want to share the same underlying RANS object with both order-0 and - // order-1 ExternalCompressors - final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ? - RANS4x8Params.ORDER.ZERO.ordinal() : - compressorSpecificArg; - final CompressorCacheRecord compressorRec = new CompressorCacheRecord( - BlockCompressionMethod.RANS, - ransArg); + // in previous implementations, we would cache separate order-0 and order-1 compressors for performance + // reasons; we no longer NEED to do so but retain this structure for now + final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG + ? RANS4x8Params.ORDER.ZERO.ordinal() + : compressorSpecificArg; + final CompressorCacheRecord compressorRec = + new CompressorCacheRecord(BlockCompressionMethod.RANS, ransArg); if (!compressorCache.containsKey(compressorRec)) { if (sharedRANS4x8Encode == null) { sharedRANS4x8Encode = new RANS4x8Encode(); @@ -96,21 +94,19 @@ public ExternalCompressor getCompressorForMethod( } compressorCache.put( compressorRec, - new RANS4x8ExternalCompressor(ransArg, sharedRANS4x8Encode, sharedRANS4x8Decode) - ); + new RANS4x8ExternalCompressor(ransArg, sharedRANS4x8Encode, sharedRANS4x8Decode)); } return getCachedCompressorForMethod(compressorRec.method, compressorRec.compressorArg); } case RANSNx16: { - // for efficiency, we want to share the same underlying RANSNx16 object with both order-0 and - // order-1 ExternalCompressors - final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG ? - RANSNx16Params.ORDER.ZERO.ordinal() : - compressorSpecificArg; - final CompressorCacheRecord compressorRec = new CompressorCacheRecord( - BlockCompressionMethod.RANSNx16, - ransArg); + // in previous implementations, we would cache separate order-0 and order-1 compressors for performance + // reasons; we no longer NEED to do so but retain this structure for now + final int ransArg = compressorSpecificArg == ExternalCompressor.NO_COMPRESSION_ARG + ? RANSNx16Params.ORDER.ZERO.ordinal() + : compressorSpecificArg; + final CompressorCacheRecord compressorRec = + new CompressorCacheRecord(BlockCompressionMethod.RANSNx16, ransArg); if (!compressorCache.containsKey(compressorRec)) { if (sharedRANSNx16Encode == null) { sharedRANSNx16Encode = new RANSNx16Encode(); @@ -120,8 +116,7 @@ public ExternalCompressor getCompressorForMethod( } compressorCache.put( compressorRec, - new RANSNx16ExternalCompressor(ransArg, sharedRANSNx16Encode, sharedRANSNx16Decode) - ); + new RANSNx16ExternalCompressor(ransArg, sharedRANSNx16Encode, sharedRANSNx16Decode)); } return getCachedCompressorForMethod(compressorRec.method, compressorRec.compressorArg); } @@ -131,13 +126,10 @@ public ExternalCompressor getCompressorForMethod( } } - private ExternalCompressor getCachedCompressorForMethod(final BlockCompressionMethod method, final int compressorSpecificArg) { + private ExternalCompressor getCachedCompressorForMethod( + final BlockCompressionMethod method, final int compressorSpecificArg) { return compressorCache.computeIfAbsent( new CompressorCacheRecord(method, compressorSpecificArg), - k -> ExternalCompressor.getCompressorForMethod( - method, - compressorSpecificArg) - ); + k -> ExternalCompressor.getCompressorForMethod(method, compressorSpecificArg)); } - -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/CompressorDescriptor.java b/src/main/java/htsjdk/samtools/cram/structure/CompressorDescriptor.java new file mode 100644 index 0000000000..be8b1935e7 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/structure/CompressorDescriptor.java @@ -0,0 +1,26 @@ +package htsjdk.samtools.cram.structure; + +import htsjdk.samtools.cram.compression.ExternalCompressor; +import htsjdk.samtools.cram.structure.block.BlockCompressionMethod; + +/** + * Describes which compression method and parameters to use for a CRAM data series block. + * Pairs a {@link BlockCompressionMethod} with an optional compressor-specific integer argument + * (e.g., GZIP compression level, rANS order). Maps 1:1 to + * {@link ExternalCompressor#getCompressorForMethod(BlockCompressionMethod, int)}. + * + * @param method the block compression method + * @param arg compressor-specific argument, or {@link ExternalCompressor#NO_COMPRESSION_ARG} if none + */ +public record CompressorDescriptor(BlockCompressionMethod method, int arg) { + + /** + * Create a descriptor for a compression method that takes no argument + * (e.g., RAW, BZIP2, LZMA, NAME_TOKENISER, FQZCOMP). + * + * @param method the block compression method + */ + public CompressorDescriptor(final BlockCompressionMethod method) { + this(method, ExternalCompressor.NO_COMPRESSION_ARG); + } +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/Container.java b/src/main/java/htsjdk/samtools/cram/structure/Container.java index 890fb9db81..902748488b 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Container.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Container.java @@ -31,7 +31,6 @@ import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.LineReader; import htsjdk.samtools.util.RuntimeIOException; - import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; @@ -96,12 +95,8 @@ public Container( baseCount += slice.getBaseCount(); } - this.containerHeader = new ContainerHeader( - alignmentContext, - blockCount, - recordCount, - globalRecordCounter, - baseCount); + this.containerHeader = + new ContainerHeader(alignmentContext, blockCount, recordCount, globalRecordCounter, baseCount); checkSliceReferenceContexts(commonRefContext.getReferenceContextID()); } @@ -151,11 +146,7 @@ public Container(final CRAMVersion cramVersion, final InputStream inputStream, f this.slices = new ArrayList<>(); for (int sliceCounter = 0; sliceCounter < containerHeader.getLandmarks().size(); sliceCounter++) { - final Slice slice = new Slice( - cramVersion, - compressionHeader, - inputStream, - containerByteOffset); + final Slice slice = new Slice(cramVersion, compressionHeader, inputStream, containerByteOffset); slices.add(slice); } @@ -194,7 +185,8 @@ public int write(final CRAMVersion cramVersion, final OutputStream outputStream) } getContainerHeader().setLandmarks(landmarks); - // compression header plus all slices, if any (EOF Containers do not; File Header Containers are handled above) + // compression header plus all slices, if any (EOF Containers do not; File Header Containers are handled + // above) getContainerHeader().setContainerBlocksByteSize(tempOutputStream.size()); // Slices require the Container's landmarks and containerBlocksByteSize in case we're indexing @@ -221,9 +213,8 @@ public int write(final CRAMVersion cramVersion, final OutputStream outputStream) * @param id id from the cram header, for error reporting * @return the {@link SAMFileHeader} for this CRAM stream */ - public static SAMFileHeader readSAMFileHeaderContainer(final CRAMVersion cramVersion, - final InputStream inputStream, - final String id) { + public static SAMFileHeader readSAMFileHeaderContainer( + final CRAMVersion cramVersion, final InputStream inputStream, final String id) { final ContainerHeader containerHeader = new ContainerHeader(cramVersion, inputStream); final Block block; if (cramVersion.compatibleWith(CramVersions.CRAM_v3)) { @@ -233,8 +224,7 @@ public static SAMFileHeader readSAMFileHeaderContainer(final CRAMVersion cramVer block = Block.read(cramVersion, bais); // ignore any remaining blocks that we may have consumed from this container (i.e., samtools adds a // second 10,000 byte (raw) block of 0s as expansion padding) - } - else { + } else { // The version 2.1 test files appear to have header containers that have a container block size that is // (2 or 4 bytes) shorter than the size of the actual embedded block containing the header. To compensate, // this code path relies on the block size value instead. It unclear where these files came from, or what @@ -246,7 +236,8 @@ public static SAMFileHeader readSAMFileHeaderContainer(final CRAMVersion cramVer // Use a temporary, single-use compressor cache for this block, since the SAMFileHeader block // is prescribed by the spec to be gzipped only and thus not perf sensitive. - try (final InputStream blockStream = new ByteArrayInputStream(block.getUncompressedContent(new CompressorCache()))) { + try (final InputStream blockStream = + new ByteArrayInputStream(block.getUncompressedContent(new CompressorCache()))) { final ByteBuffer buffer = ByteBuffer.allocate(4); buffer.order(ByteOrder.LITTLE_ENDIAN); @@ -262,7 +253,7 @@ public static SAMFileHeader readSAMFileHeaderContainer(final CRAMVersion cramVer dataInputStream.readFully(bytes); final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); try (final InputStream byteStream = new ByteArrayInputStream(bytes); - final LineReader lineReader = new BufferedLineReader(byteStream)) { + final LineReader lineReader = new BufferedLineReader(byteStream)) { return codec.decode(lineReader, id); } } catch (final IOException e) { @@ -277,7 +268,8 @@ public static SAMFileHeader readSAMFileHeaderContainer(final CRAMVersion cramVer * @param os stream to which the header container should be written * @return the number of bytes written to the stream */ - public static long writeSAMFileHeaderContainer(final CRAMVersion cramVersion, final SAMFileHeader samFileHeader, final OutputStream os) { + public static long writeSAMFileHeaderContainer( + final CRAMVersion cramVersion, final SAMFileHeader samFileHeader, final OutputStream os) { final byte[] samFileHeaderBytes = CramIO.samHeaderToByteArray(samFileHeader); // The spec recommends "reserving" 50% more space than is required by the header, buts not // clear how to do that if you compress the block. Samtools appears to add a second empty @@ -317,11 +309,11 @@ public List getSAMRecords( final SAMFileHeader samFileHeader) { final List samRecords = new ArrayList<>(getContainerHeader().getNumberOfRecords()); for (final Slice slice : getSlices()) { - final List cramCompressionRecords = slice.deserializeCRAMRecords(compressorCache, validationStringency); // before we convert to SAMRecord, we need to normalize the CRAMCompressionRecord in each Slice - slice.normalizeCRAMRecords( - cramCompressionRecords, - cramReferenceRegion); + final List cramCompressionRecords = + slice.deserializeCRAMRecords(compressorCache, validationStringency); + slice.normalizeCRAMRecords(cramCompressionRecords, cramReferenceRegion); + for (final CRAMCompressionRecord cramCompressionRecord : cramCompressionRecords) { final SAMRecord samRecord = cramCompressionRecord.toSAMRecord(samFileHeader); samRecord.setValidationStringency(validationStringency); @@ -331,11 +323,26 @@ public List getSAMRecords( return samRecords; } - public ContainerHeader getContainerHeader() { return containerHeader; } - public CompressionHeader getCompressionHeader() { return compressionHeader; } - public AlignmentContext getAlignmentContext() { return containerHeader.getAlignmentContext(); } - public long getContainerByteOffset() { return containerByteOffset; } - public List getSlices() { return slices; } + public ContainerHeader getContainerHeader() { + return containerHeader; + } + + public CompressionHeader getCompressionHeader() { + return compressionHeader; + } + + public AlignmentContext getAlignmentContext() { + return containerHeader.getAlignmentContext(); + } + + public long getContainerByteOffset() { + return containerByteOffset; + } + + public List getSlices() { + return slices; + } + public boolean isEOF() { return containerHeader.isEOF() && (getSlices() == null || getSlices().size() == 0); } @@ -380,28 +387,34 @@ public List getBAIEntries(final CompressorCache compressorCache) { * @throws CRAMException when the Container is in an invalid state */ private void distributeIndexingParametersToSlices() { + if (slices.isEmpty()) { + return; + } final int lastSliceIndex = slices.size() - 1; for (int i = 0; i < lastSliceIndex; i++) { final Slice slice = slices.get(i); slice.setLandmarkIndex(i); slice.setByteOffsetOfSliceHeaderBlock(containerHeader.getLandmarks().get(i)); - slice.setByteSizeOfSliceBlocks(containerHeader.getLandmarks().get(i + 1) - slice.getByteOffsetOfSliceHeaderBlock()); + slice.setByteSizeOfSliceBlocks( + containerHeader.getLandmarks().get(i + 1) - slice.getByteOffsetOfSliceHeaderBlock()); } // get the last slice in the list, and final Slice lastSlice = slices.get(lastSliceIndex); lastSlice.setLandmarkIndex(lastSliceIndex); lastSlice.setByteOffsetOfSliceHeaderBlock(containerHeader.getLandmarks().get(lastSliceIndex)); - lastSlice.setByteSizeOfSliceBlocks(containerHeader.getContainerBlocksByteSize() - lastSlice.getByteOffsetOfSliceHeaderBlock()); + lastSlice.setByteSizeOfSliceBlocks( + containerHeader.getContainerBlocksByteSize() - lastSlice.getByteOffsetOfSliceHeaderBlock()); } private void checkSliceReferenceContexts(final int actualReferenceContextID) { if (actualReferenceContextID == ReferenceContext.MULTIPLE_REFERENCE_ID) { for (final Slice slice : getSlices()) { - if (slice.getAlignmentContext().getReferenceContext().getReferenceContextID() != ReferenceContext.MULTIPLE_REFERENCE_ID) { - throw new CRAMException( - String.format("Found slice with reference context (%d). Multi-reference container can only contain multi-ref slices.", - slice.getAlignmentContext().getReferenceContext().getReferenceContextID())); + if (slice.getAlignmentContext().getReferenceContext().getReferenceContextID() + != ReferenceContext.MULTIPLE_REFERENCE_ID) { + throw new CRAMException(String.format( + "Found slice with reference context (%d). Multi-reference container can only contain multi-ref slices.", + slice.getAlignmentContext().getReferenceContext().getReferenceContextID())); } } } @@ -421,7 +434,8 @@ private final AlignmentContext getDerivedAlignmentContext(final ReferenceContext for (final Slice slice : slices) { final AlignmentContext alignmentContext = slice.getAlignmentContext(); start = Math.min(start, alignmentContext.getAlignmentStart()); - endPlusOne = Math.max(endPlusOne, alignmentContext.getAlignmentStart() + alignmentContext.getAlignmentSpan()); + endPlusOne = Math.max( + endPlusOne, alignmentContext.getAlignmentStart() + alignmentContext.getAlignmentSpan()); } alignmentStart = start; alignmentSpan = endPlusOne - start; @@ -442,8 +456,7 @@ private static ReferenceContext getDerivedReferenceContextFromSlices(final List< .collect(Collectors.toSet()); if (sliceRefContexts.isEmpty()) { throw new CRAMException("Cannot construct a container without any slices"); - } - else if (sliceRefContexts.size() > 1) { + } else if (sliceRefContexts.size() > 1) { return ReferenceContext.MULTIPLE_REFERENCE_CONTEXT; } @@ -452,12 +465,10 @@ else if (sliceRefContexts.size() > 1) { @Override public String toString() { - return String.format("%s offset %d nSlices %d", + return String.format( + "%s offset %d nSlices %d", containerHeader.toString(), getContainerByteOffset(), - getSlices() == null ? - -1 : - getSlices().size()); + getSlices() == null ? -1 : getSlices().size()); } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeader.java b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeader.java index fd4cac79d7..0183ba7b36 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ContainerHeader.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ContainerHeader.java @@ -29,7 +29,6 @@ import htsjdk.samtools.cram.io.*; import htsjdk.samtools.cram.ref.ReferenceContext; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -64,6 +63,7 @@ public class ContainerHeader { * - Slice 1 has offset 109000 and size 14456 (123456 - 109000) */ private List landmarks; + private int checksum = 0; private int containerBlocksByteSize; @@ -113,22 +113,15 @@ public ContainerHeader( final int recordCount, final long globalRecordCounter, final int baseCount) { - this(alignmentContext, - blockCount, - 0, - recordCount, - globalRecordCounter, - baseCount, - new ArrayList<>(), - 0); + this(alignmentContext, blockCount, 0, recordCount, globalRecordCounter, baseCount, new ArrayList<>(), 0); } /** - * Create a container header from an {@link InputStream}. + * Create a container header from an {@link InputStream}. Populates the container header values + * but leaves the body empty (no slices and blocks). * * @param cramVersion the CRAM version to assume * @param inputStream the input stream from which to read - * @return a new {@link ContainerHeader} object with container header values filled out but empty body (no slices and blocks). */ public ContainerHeader(final CRAMVersion cramVersion, final InputStream inputStream) { this.containerBlocksByteSize = CramInt.readInt32(inputStream); @@ -149,16 +142,16 @@ public ContainerHeader(final CRAMVersion cramVersion, final InputStream inputStr * @param containerBlocksByteSize size of the SAMFileHeader block to be embedded in this container */ public static ContainerHeader makeSAMFileHeaderContainer(final int containerBlocksByteSize) { - return new ContainerHeader( - // we need to assign SOME alignment context for this bogus/special header container... - AlignmentContext.UNMAPPED_UNPLACED_CONTEXT, - 1, // block count - containerBlocksByteSize, - 0, // record count - 0, // global record count - 0, // base count - Collections.emptyList(), // landmarks - 0); // checksum + return new ContainerHeader( + // we need to assign SOME alignment context for this bogus/special header container... + AlignmentContext.UNMAPPED_UNPLACED_CONTEXT, + 1, // block count + containerBlocksByteSize, + 0, // record count + 0, // global record count + 0, // base count + Collections.emptyList(), // landmarks + 0); // checksum } public int getContainerBlocksByteSize() { @@ -169,7 +162,9 @@ public void setContainerBlocksByteSize(int containerBlocksByteSize) { this.containerBlocksByteSize = containerBlocksByteSize; } - public AlignmentContext getAlignmentContext() { return alignmentContext; } + public AlignmentContext getAlignmentContext() { + return alignmentContext; + } public int getNumberOfRecords() { return recordCount; @@ -209,7 +204,10 @@ public int write(final CRAMVersion cramVersion, final OutputStream outputStream) final CRC32OutputStream crc32OutputStream = new CRC32OutputStream(outputStream); int length = (CramInt.writeInt32(getContainerBlocksByteSize(), crc32OutputStream) + 7) / 8; - length += (ITF8.writeUnsignedITF8(alignmentContext.getReferenceContext().getReferenceContextID(), crc32OutputStream) + 7) / 8; + length += (ITF8.writeUnsignedITF8( + alignmentContext.getReferenceContext().getReferenceContextID(), crc32OutputStream) + + 7) + / 8; length += (ITF8.writeUnsignedITF8(alignmentContext.getAlignmentStart(), crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(alignmentContext.getAlignmentSpan(), crc32OutputStream) + 7) / 8; length += (ITF8.writeUnsignedITF8(getNumberOfRecords(), crc32OutputStream) + 7) / 8; @@ -224,7 +222,7 @@ public int write(final CRAMVersion cramVersion, final OutputStream outputStream) } catch (final IOException e) { throw new RuntimeIOException(e); } - length += 4 ; + length += 4; } return length; @@ -234,19 +232,22 @@ public int write(final CRAMVersion cramVersion, final OutputStream outputStream) public String toString() { return String.format( "%s, nRecords=%d, nBlocks=%d, nBases=%d, globalCounter=%d", - alignmentContext, recordCount, blockCount, baseCount, globalRecordCounter); + alignmentContext, recordCount, blockCount, baseCount, globalRecordCounter); } public boolean isEOF() { - final boolean v3 = containerBlocksByteSize == CramIO.EOF_BLOCK_SIZE_V3 && alignmentContext.getReferenceContext().isUnmappedUnplaced() - && alignmentContext.getAlignmentStart() == CramIO.EOF_ALIGNMENT_START && blockCount == 1 + final boolean v3 = containerBlocksByteSize == CramIO.EOF_BLOCK_SIZE_V3 + && alignmentContext.getReferenceContext().isUnmappedUnplaced() + && alignmentContext.getAlignmentStart() == CramIO.EOF_ALIGNMENT_START + && blockCount == 1 && recordCount == 0; - final boolean v2 = containerBlocksByteSize == CramIO.EOF_BLOCK_SIZE_V2 && alignmentContext.getReferenceContext().isUnmappedUnplaced() - && alignmentContext.getAlignmentStart() == CramIO.EOF_ALIGNMENT_START && blockCount == 1 + final boolean v2 = containerBlocksByteSize == CramIO.EOF_BLOCK_SIZE_V2 + && alignmentContext.getReferenceContext().isUnmappedUnplaced() + && alignmentContext.getAlignmentStart() == CramIO.EOF_ALIGNMENT_START + && blockCount == 1 && recordCount == 0; return v3 || v2; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/CramHeader.java b/src/main/java/htsjdk/samtools/cram/structure/CramHeader.java index 78942c78dc..781a8d8b94 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/CramHeader.java +++ b/src/main/java/htsjdk/samtools/cram/structure/CramHeader.java @@ -18,7 +18,6 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.common.CRAMVersion; - import java.util.Arrays; import java.util.Objects; @@ -49,7 +48,7 @@ public CramHeader(final CRAMVersion cramVersion, final String id) { this.id = new byte[CRAM_ID_LENGTH]; Arrays.fill(this.id, (byte) 0); if (id != null) { - System.arraycopy(id.getBytes(),0, this.id, 0, Math.min(id.length(), this.id.length)); + System.arraycopy(id.getBytes(), 0, this.id, 0, Math.min(id.length(), this.id.length)); } } @@ -66,8 +65,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; final CramHeader that = (CramHeader) o; - return Objects.equals(cramVersion, that.cramVersion) && - Arrays.equals(id, that.id); + return Objects.equals(cramVersion, that.cramVersion) && Arrays.equals(id, that.id); } @Override diff --git a/src/main/java/htsjdk/samtools/cram/structure/DataSeries.java b/src/main/java/htsjdk/samtools/cram/structure/DataSeries.java index 354d50cff2..20e0f223a2 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/DataSeries.java +++ b/src/main/java/htsjdk/samtools/cram/structure/DataSeries.java @@ -18,7 +18,6 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.CRAMException; - import java.util.Collections; import java.util.Map; import java.util.Optional; @@ -34,73 +33,75 @@ */ public enum DataSeries { - // in rough encoding/decoding order, by group + // Content IDs match htslib's cram_DS_ID enum (cram_structs.h) for easier cross-implementation + // debugging. These IDs are written into each container's compression header encoding map and + // are only used for newly written files — existing files encode their own ID mapping. // Main - BF_BitFlags (DataSeriesType.INT, "BF", 1), - CF_CompressionBitFlags (DataSeriesType.INT, "CF", 2), + BF_BitFlags(DataSeriesType.INT, "BF", 15), + CF_CompressionBitFlags(DataSeriesType.INT, "CF", 16), // Positional - RI_RefId (DataSeriesType.INT, "RI", 3), - RL_ReadLength (DataSeriesType.INT, "RL", 4), - AP_AlignmentPositionOffset (DataSeriesType.INT, "AP", 5), - RG_ReadGroup (DataSeriesType.INT, "RG", 6), + RI_RefId(DataSeriesType.INT, "RI", 33), + RL_ReadLength(DataSeriesType.INT, "RL", 25), + AP_AlignmentPositionOffset(DataSeriesType.INT, "AP", 17), + RG_ReadGroup(DataSeriesType.INT, "RG", 18), // Read Name - RN_ReadName (DataSeriesType.BYTE_ARRAY, "RN", 7), + RN_ReadName(DataSeriesType.BYTE_ARRAY, "RN", 11), // Mate Record - NF_RecordsToNextFragment (DataSeriesType.INT, "NF", 8), - MF_MateBitFlags (DataSeriesType.INT, "MF", 9), - NS_NextFragmentReferenceSequenceID (DataSeriesType.INT, "NS", 10), - NP_NextFragmentAlignmentStart (DataSeriesType.INT, "NP", 11), - TS_InsertSize (DataSeriesType.INT, "TS", 12), + NF_RecordsToNextFragment(DataSeriesType.INT, "NF", 24), + MF_MateBitFlags(DataSeriesType.INT, "MF", 21), + NS_NextFragmentReferenceSequenceID(DataSeriesType.INT, "NS", 20), + NP_NextFragmentAlignmentStart(DataSeriesType.INT, "NP", 23), + TS_InsertSize(DataSeriesType.INT, "TS", 22), // Auxiliary Tags - TL_TagIdList (DataSeriesType.INT, "TL", 13), + TL_TagIdList(DataSeriesType.INT, "TL", 32), // Retained for backward compatibility on CRAM read. See https://github.com/samtools/hts-specs/issues/598 // https://github.com/samtools/htsjdk/issues/1571 - TC_TagCount (DataSeriesType.INT, "TC", 14), - TN_TagNameAndType (DataSeriesType.INT, "TN", 15), + TC_TagCount(DataSeriesType.INT, "TC", 44), + TN_TagNameAndType(DataSeriesType.INT, "TN", 39), // Mapped Reads - MQ_MappingQualityScore (DataSeriesType.INT, "MQ", 16), + MQ_MappingQualityScore(DataSeriesType.INT, "MQ", 19), // Read Feature Records - FN_NumberOfReadFeatures (DataSeriesType.INT, "FN", 17), - FP_FeaturePosition (DataSeriesType.INT, "FP", 18), - FC_FeatureCode (DataSeriesType.BYTE, "FC", 19), + FN_NumberOfReadFeatures(DataSeriesType.INT, "FN", 26), + FP_FeaturePosition(DataSeriesType.INT, "FP", 28), + FC_FeatureCode(DataSeriesType.BYTE, "FC", 27), // Read Feature Codes - BB_Bases (DataSeriesType.BYTE_ARRAY, "BB", 20), - QQ_scores (DataSeriesType.BYTE_ARRAY, "QQ", 21), - BA_Base (DataSeriesType.BYTE, "BA", 22), + BB_Bases(DataSeriesType.BYTE_ARRAY, "BB", 37), + QQ_scores(DataSeriesType.BYTE_ARRAY, "QQ", 38), + BA_Base(DataSeriesType.BYTE, "BA", 30), // NOTE: the CramRecordReader and CramRecordWriter split the QS_QualityScore into two separate // DataSeriesReader/Writer(s), one uses the params described here (BYTE) and one uses BYTE_ARRAY - QS_QualityScore (DataSeriesType.BYTE, "QS", 23), - BS_BaseSubstitutionCode (DataSeriesType.BYTE, "BS", 24), - IN_Insertion (DataSeriesType.BYTE_ARRAY, "IN", 25), - DL_DeletionLength (DataSeriesType.INT, "DL", 26), - RS_RefSkip (DataSeriesType.INT, "RS", 27), - SC_SoftClip (DataSeriesType.BYTE_ARRAY, "SC", 28), - PD_padding (DataSeriesType.INT, "PD", 29), - HC_HardClip (DataSeriesType.INT, "HC", 30), + QS_QualityScore(DataSeriesType.BYTE, "QS", 12), + BS_BaseSubstitutionCode(DataSeriesType.BYTE, "BS", 31), + IN_Insertion(DataSeriesType.BYTE_ARRAY, "IN", 13), + DL_DeletionLength(DataSeriesType.INT, "DL", 29), + RS_RefSkip(DataSeriesType.INT, "RS", 34), + SC_SoftClip(DataSeriesType.BYTE_ARRAY, "SC", 14), + PD_padding(DataSeriesType.INT, "PD", 35), + HC_HardClip(DataSeriesType.INT, "HC", 36), - // For Testing Only + // For Testing Only — IDs match htslib's DS_TM=45, DS_TV=46 // NOTE: these are not listed in the spec - TM_TestMark (DataSeriesType.INT, "TM", 31), - TV_TestMark (DataSeriesType.INT, "TV", 32); + TM_TestMark(DataSeriesType.INT, "TM", 45), + TV_TestMark(DataSeriesType.INT, "TV", 46); private final DataSeriesType type; private final String canonicalName; @@ -139,11 +140,13 @@ public Integer getExternalBlockContentId() { */ public static DataSeries byCanonicalName(final String dataSeriesAbbreviation) { if (dataSeriesAbbreviation.length() != 2) { - throw new CRAMException("Data Series Canonical Name should be exactly two characters: " + dataSeriesAbbreviation); + throw new CRAMException( + "Data Series Canonical Name should be exactly two characters: " + dataSeriesAbbreviation); } return Optional.ofNullable(CANONICAL_NAME_MAP.get(dataSeriesAbbreviation)) - .orElseThrow(() -> new CRAMException("Could not find Data Series Encoding for: " + dataSeriesAbbreviation)); + .orElseThrow( + () -> new CRAMException("Could not find Data Series Encoding for: " + dataSeriesAbbreviation)); } private static final Map CANONICAL_NAME_MAP = diff --git a/src/main/java/htsjdk/samtools/cram/structure/EncodingDescriptor.java b/src/main/java/htsjdk/samtools/cram/structure/EncodingDescriptor.java index 62904ae72d..f82c7bf009 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/EncodingDescriptor.java +++ b/src/main/java/htsjdk/samtools/cram/structure/EncodingDescriptor.java @@ -18,7 +18,6 @@ package htsjdk.samtools.cram.structure; import htsjdk.samtools.util.StringUtil; - import java.util.Arrays; /** @@ -61,8 +60,7 @@ public String toString() { "%s: (%s)", getEncodingID().name(), StringUtil.bytesToHexString( - Arrays.copyOfRange( - getEncodingParameters(),0, Math.max(20, getEncodingParameters().length)))); + Arrays.copyOfRange(getEncodingParameters(), 0, Math.max(20, getEncodingParameters().length)))); } @Override @@ -74,7 +72,6 @@ public boolean equals(Object o) { if (getEncodingID() != that.getEncodingID()) return false; return Arrays.equals(getEncodingParameters(), that.getEncodingParameters()); - } @Override @@ -83,5 +80,4 @@ public int hashCode() { result = 31 * result + Arrays.hashCode(getEncodingParameters()); return result; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/ReadTag.java b/src/main/java/htsjdk/samtools/cram/structure/ReadTag.java index 9d1a44127d..0404e3642f 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/ReadTag.java +++ b/src/main/java/htsjdk/samtools/cram/structure/ReadTag.java @@ -20,7 +20,6 @@ import htsjdk.samtools.*; import htsjdk.samtools.SAMRecord.SAMTagAndValue; import htsjdk.samtools.util.StringUtil; - import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.charset.Charset; @@ -48,9 +47,16 @@ public class ReadTag implements Comparable { private short code; private byte index; + /** + * Construct a ReadTag from a 3-byte tag ID and raw value bytes. + * + * @param id the tag ID packed as an int (2 bytes tag name + 1 byte type) + * @param dataAsByteArray the raw tag value bytes + * @param validationStringency validation stringency for parsing + */ public ReadTag(final int id, final byte[] dataAsByteArray, ValidationStringency validationStringency) { this.type = (char) (0xFF & id); - key = new String(new char[]{(char) ((id >> 16) & 0xFF), (char) ((id >> 8) & 0xFF)}); + key = new String(new char[] {(char) ((id >> 16) & 0xFF), (char) ((id >> 8) & 0xFF)}); value = restoreValueFromByteArray(type, dataAsByteArray, validationStringency); keyType3Bytes = this.key + this.type; @@ -59,11 +65,28 @@ public ReadTag(final int id, final byte[] dataAsByteArray, ValidationStringency code = SAMTag.makeBinaryTag(this.key); } + /** + * Construct a ReadTag using pre-cached key metadata to avoid repeated String allocation. + * + * @param cached pre-computed key metadata from the {@link TagKeyCache} + * @param dataAsByteArray the raw tag value bytes + * @param validationStringency validation stringency for parsing + */ + public ReadTag( + final TagKeyCache.TagKeyInfo cached, + final byte[] dataAsByteArray, + ValidationStringency validationStringency) { + this.type = cached.type; + this.key = cached.key; + this.keyType3Bytes = cached.keyType3Bytes; + this.keyType3BytesAsInt = cached.keyType3BytesAsInt; + this.code = cached.code; + this.value = restoreValueFromByteArray(type, dataAsByteArray, validationStringency); + } + private ReadTag(final String key, final char type, final Object value) { - if (key == null) - throw new NullPointerException("Tag key cannot be null."); - if (value == null) - throw new NullPointerException("Tag value cannot be null."); + if (key == null) throw new NullPointerException("Tag key cannot be null."); + if (value == null) throw new NullPointerException("Tag value cannot be null."); this.value = value; @@ -83,7 +106,12 @@ private ReadTag(final String key, final char type, final Object value) { code = SAMTag.makeBinaryTag(this.key); } - // two bytes are tag name and one byte is type + /** + * Pack a 3-byte tag ID (2 bytes name + 1 byte type) into an int. + * + * @param name byte array of length 3 (tag name char 1, char 2, type char) + * @return the packed int representation + */ public static int name3BytesToInt(final byte[] name) { int value = 0xFF & name[0]; value <<= 8; @@ -94,6 +122,13 @@ public static int name3BytesToInt(final byte[] name) { return value; } + /** + * Pack a 2-character tag name and a type character into a 3-byte int. + * + * @param name two-character tag name (e.g. "NM") + * @param type single-character type code (e.g. 'i', 'Z') + * @return the packed int representation + */ public static int nameType3BytesToInt(final String name, final char type) { int value = 0xFF & name.charAt(0); value <<= 8; @@ -104,38 +139,59 @@ public static int nameType3BytesToInt(final String name, final char type) { return value; } - // two bytes are tag name and one byte is type - public static String intToNameType3Bytes(final int value) { + /** + * Unpack a 3-byte tag ID int into a String. If {@code withColon} is false, returns a + * 3-character string like "NMi"; if true, returns a 4-character string like "NM:i". + * + * @param value the packed int + * @param withColon if true, insert ':' between the 2-char name and the type char + * @return unpacked tag ID string + */ + public static String intToNameType(final int value, final boolean withColon) { final byte b3 = (byte) (0xFF & value); final byte b2 = (byte) (0xFF & (value >> 8)); final byte b1 = (byte) (0xFF & (value >> 16)); - return new String(new byte[]{b1, b2, b3}); + return withColon ? new String(new byte[] {b1, b2, ':', b3}) : new String(new byte[] {b1, b2, b3}); } - //TODO: consolidate this with the method above, and add some tests - public static String intToNameType4Bytes(final int value) { - final byte b3 = (byte) (0xFF & value); - final byte b2 = (byte) (0xFF & (value >> 8)); - final byte b1 = (byte) (0xFF & (value >> 16)); + /** Shorthand for {@link #intToNameType(int, boolean) intToNameType(value, false)}. */ + public static String intToNameType3Bytes(final int value) { + return intToNameType(value, false); + } - return new String(new byte[]{b1, b2, ':', b3}); + /** Shorthand for {@link #intToNameType(int, boolean) intToNameType(value, true)}. */ + public static String intToNameType4Bytes(final int value) { + return intToNameType(value, true); } + /** Create a {@link SAMTagAndValue} from this ReadTag's key and value. */ public SAMTagAndValue createSAMTag() { return new SAMTagAndValue(key, value); } + /** + * Create a ReadTag from a 4-character "XX:T" key-and-type string and a value. + * + * @param keyAndType 4-character string in "XX:T" format (e.g. "NM:i") + * @param value the tag value + * @return a new ReadTag + */ public static ReadTag deriveTypeFromKeyAndType(final String keyAndType, final Object value) { - if (keyAndType.length() != 4) - throw new RuntimeException("Tag key and type must be 4 char long: " + keyAndType); + if (keyAndType.length() != 4) throw new RuntimeException("Tag key and type must be 4 char long: " + keyAndType); return new ReadTag(keyAndType.substring(0, 2), keyAndType.charAt(3), value); } + /** + * Create a ReadTag by inferring the CRAM type code from the Java type of the value. + * + * @param key two-character tag name (e.g. "NM") + * @param value the tag value (String, Character, Number, or array) + * @return a new ReadTag + */ public static ReadTag deriveTypeFromValue(final String key, final Object value) { - if (key.length() != 2) - throw new RuntimeException("Tag key must be 2 char long: " + key); + if (key.length() != 2) throw new RuntimeException("Tag key must be 2 char long: " + key); return new ReadTag(key, getTagValueType(value), value); } @@ -161,11 +217,13 @@ public String getKeyAndType() { return keyAndType; } + /** Serialize this tag's value to a byte array using CRAM/BAM binary encoding. */ public byte[] getValueAsByteArray() { return writeSingleValue((byte) type, value, false); } - private static Object restoreValueFromByteArray(final char type, final byte[] array, ValidationStringency validationStringency) { + private static Object restoreValueFromByteArray( + final char type, final byte[] array, ValidationStringency validationStringency) { final ByteBuffer buffer = ByteBuffer.wrap(array); buffer.order(ByteOrder.LITTLE_ENDIAN); return readSingleValue((byte) type, buffer, validationStringency); @@ -180,29 +238,32 @@ private static char getTagValueType(final Object value) { } else if (value instanceof Float) { return 'f'; } else if (value instanceof Number) { - if (!(value instanceof Byte || value instanceof Short - || value instanceof Integer || value instanceof Long)) { - throw new IllegalArgumentException("Unrecognized tag type " - + value.getClass().getName()); + if (!(value instanceof Byte + || value instanceof Short + || value instanceof Integer + || value instanceof Long)) { + throw new IllegalArgumentException( + "Unrecognized tag type " + value.getClass().getName()); } return getIntegerType(((Number) value).longValue()); } /* - * Note that H tag type is never written anymore, because B style is - * more compact. else if (value instanceof byte[]) { return 'H'; } - */ else if (value instanceof byte[] || value instanceof short[] - || value instanceof int[] || value instanceof float[]) { + * Note that H tag type is never written anymore, because B style is + * more compact. else if (value instanceof byte[]) { return 'H'; } + */ else if (value instanceof byte[] + || value instanceof short[] + || value instanceof int[] + || value instanceof float[]) { return 'B'; } else { - throw new IllegalArgumentException( - "When writing BAM, unrecognized tag type " - + value.getClass().getName()); + throw new IllegalArgumentException("When writing BAM, unrecognized tag type " + + value.getClass().getName()); } } // copied from net.sf.samtools.BinaryTagCodec: - static private char getIntegerType(final long val) { + private static char getIntegerType(final long val) { if (val > MAX_UINT) { - throw new IllegalArgumentException("Integer attribute value too large: "+val); + throw new IllegalArgumentException("Integer attribute value too large: " + val); } if (val > MAX_INT) { return 'I'; @@ -228,8 +289,7 @@ static private char getIntegerType(final long val) { if (val >= Integer.MIN_VALUE) { return 'i'; } - throw new IllegalArgumentException( - "Integer attribute value too negative to be encoded in BAM"); + throw new IllegalArgumentException("Integer attribute value too negative to be encoded in BAM"); } public void setIndex(final byte i) { @@ -253,8 +313,15 @@ protected ByteBuffer initialValue() { private static final Charset charset = Charset.forName("US-ASCII"); - public static byte[] writeSingleValue(final byte tagType, final Object value, - final boolean isUnsignedArray) { + /** + * Serialize a single tag value to a byte array in BAM binary format. + * + * @param tagType the BAM type code (e.g. 'i', 'Z', 'B') + * @param value the value to serialize + * @param isUnsignedArray if true and the value is an array, use unsigned array sub-type codes + * @return the serialized bytes + */ + public static byte[] writeSingleValue(final byte tagType, final Object value, final boolean isUnsignedArray) { final ByteBuffer buffer = bufferLocal.get(); buffer.clear(); switch (tagType) { @@ -302,8 +369,7 @@ public static byte[] writeSingleValue(final byte tagType, final Object value, writeArray(value, isUnsignedArray, buffer); break; default: - throw new SAMFormatException("Unrecognized tag type: " - + (char) tagType); + throw new SAMFormatException("Unrecognized tag type: " + (char) tagType); } buffer.flip(); @@ -313,43 +379,44 @@ public static byte[] writeSingleValue(final byte tagType, final Object value, return bytes; } - private static void writeArray(final Object value, - final boolean isUnsignedArray, final ByteBuffer buffer) { + private static void writeArray(final Object value, final boolean isUnsignedArray, final ByteBuffer buffer) { if (value instanceof byte[]) { buffer.put((byte) (isUnsignedArray ? 'C' : 'c')); final byte[] array = (byte[]) value; buffer.putInt(array.length); - for (final byte element : array) - buffer.put(element); + for (final byte element : array) buffer.put(element); } else if (value instanceof short[]) { buffer.put((byte) (isUnsignedArray ? 'S' : 's')); final short[] array = (short[]) value; buffer.putInt(array.length); - for (final short element : array) - buffer.putShort(element); + for (final short element : array) buffer.putShort(element); } else if (value instanceof int[]) { buffer.put((byte) (isUnsignedArray ? 'I' : 'i')); final int[] array = (int[]) value; buffer.putInt(array.length); - for (final int element : array) - buffer.putInt(element); + for (final int element : array) buffer.putInt(element); } else if (value instanceof float[]) { buffer.put((byte) 'f'); final float[] array = (float[]) value; buffer.putInt(array.length); - for (final float element : array) - buffer.putFloat(element); + for (final float element : array) buffer.putFloat(element); - } else - throw new SAMException("Unrecognized array value type: " - + value.getClass()); + } else throw new SAMException("Unrecognized array value type: " + value.getClass()); } - public static Object readSingleValue(final byte tagType, - final ByteBuffer byteBuffer, ValidationStringency validationStringency) { + /** + * Read a single tag value from a ByteBuffer in BAM binary format. + * + * @param tagType the BAM type code (e.g. 'i', 'Z', 'B') + * @param byteBuffer little-endian ByteBuffer positioned at the start of the value + * @param validationStringency validation stringency for error handling + * @return the deserialized value as the appropriate Java type + */ + public static Object readSingleValue( + final byte tagType, final ByteBuffer byteBuffer, ValidationStringency validationStringency) { switch (tagType) { case 'Z': return readNullTerminatedString(byteBuffer); @@ -358,13 +425,17 @@ public static Object readSingleValue(final byte tagType, case 'I': final long val = byteBuffer.getInt() & 0xffffffffL; if (val <= Integer.MAX_VALUE) { - return (int)val; + return (int) val; } // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer, // return it directly as a long - if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) { - SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE, - "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency); + if (!SAMUtils.isValidUnsignedIntegerAttribute(val)) { + SAMUtils.processValidationError( + new SAMValidationError( + SAMValidationError.Type.TAG_VALUE_TOO_LARGE, + "Unsigned integer is out of range for a 32-bit unsigned value: " + val, + null), + validationStringency); } return val; case 'i': @@ -385,12 +456,10 @@ public static Object readSingleValue(final byte tagType, final String hexRep = readNullTerminatedString(byteBuffer); return StringUtil.hexStringToBytes(hexRep); case 'B': - final TagValueAndUnsignedArrayFlag valueAndFlag = readArray( - byteBuffer); + final TagValueAndUnsignedArrayFlag valueAndFlag = readArray(byteBuffer); return valueAndFlag.value; default: - throw new SAMFormatException("Unrecognized tag type: " - + (char) tagType); + throw new SAMFormatException("Unrecognized tag type: " + (char) tagType); } } @@ -401,8 +470,7 @@ public static Object readSingleValue(final byte tagType, * @return CVO containing the value in in-memory Object form, and a flag * indicating whether it is unsigned or not. */ - private static TagValueAndUnsignedArrayFlag readArray( - final ByteBuffer byteBuffer) { + private static TagValueAndUnsignedArrayFlag readArray(final ByteBuffer byteBuffer) { final byte arrayType = byteBuffer.get(); final boolean isUnsigned = Character.isUpperCase(arrayType); final int length = byteBuffer.getInt(); @@ -442,8 +510,7 @@ private static TagValueAndUnsignedArrayFlag readArray( } default: - throw new SAMFormatException("Unrecognized tag array type: " - + (char) arrayType); + throw new SAMFormatException("Unrecognized tag array type: " + (char) arrayType); } return new TagValueAndUnsignedArrayFlag(value, isUnsigned); } @@ -453,7 +520,8 @@ private static String readNullTerminatedString(final ByteBuffer byteBuffer) { byteBuffer.mark(); final int startPosition = byteBuffer.position(); //noinspection StatementWithEmptyBody - while (byteBuffer.get() != 0) ; + while (byteBuffer.get() != 0) + ; final int endPosition = byteBuffer.position(); // Don't count null terminator @@ -471,14 +539,14 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; final ReadTag readTag = (ReadTag) o; - return keyType3BytesAsInt == readTag.keyType3BytesAsInt && - type == readTag.type && - code == readTag.code && - index == readTag.index && - Objects.equals(key, readTag.key) && - Objects.equals(keyAndType, readTag.keyAndType) && - Objects.equals(keyType3Bytes, readTag.keyType3Bytes) && - Objects.equals(value, readTag.value); + return keyType3BytesAsInt == readTag.keyType3BytesAsInt + && type == readTag.type + && code == readTag.code + && index == readTag.index + && Objects.equals(key, readTag.key) + && Objects.equals(keyAndType, readTag.keyAndType) + && Objects.equals(keyType3Bytes, readTag.keyType3Bytes) + && Objects.equals(value, readTag.value); } @Override diff --git a/src/main/java/htsjdk/samtools/cram/structure/Slice.java b/src/main/java/htsjdk/samtools/cram/structure/Slice.java index 99b2e0dd02..0cfffe25af 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/Slice.java +++ b/src/main/java/htsjdk/samtools/cram/structure/Slice.java @@ -24,7 +24,6 @@ import htsjdk.samtools.cram.build.CRAMReferenceRegion; import htsjdk.samtools.cram.common.CRAMVersion; import htsjdk.samtools.cram.common.CramVersions; -import htsjdk.samtools.cram.digest.ContentDigests; import htsjdk.samtools.cram.encoding.reader.CramRecordReader; import htsjdk.samtools.cram.encoding.writer.CramRecordWriter; import htsjdk.samtools.cram.io.CramIntArray; @@ -32,15 +31,14 @@ import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.cram.io.LTF8; import htsjdk.samtools.cram.ref.ReferenceContext; -import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.ref.ReferenceContextType; +import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockContentType; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.SequenceUtil; import htsjdk.utils.ValidationUtils; - import java.io.*; import java.math.BigInteger; import java.util.*; @@ -69,7 +67,7 @@ public class Slice { private final AlignmentContext alignmentContext; // ref sequence, alignment start and span private final int nRecords; private final long globalRecordCounter; - private final int nSliceBlocks; // includes the core block and external blocks, but not the header block + private final int nSliceBlocks; // includes the core block and external blocks, but not the header block private List contentIDs; private int embeddedReferenceBlockContentID = EMBEDDED_REFERENCE_ABSENT_CONTENT_ID; private byte[] referenceMD5 = new byte[MD5_BYTE_SIZE]; @@ -98,7 +96,7 @@ public class Slice { // not part of the stream, and the individual records are not decoded until they're requested (they are // not decoded during indexing, with the exception of MULTI_REF slices, where its required that the slice // be resolved into individual reference contexts for inclusion in the index). - private int mappedReadsCount = 0; // mapped (rec.getReadUnmappedFlag() != true) + private int mappedReadsCount = 0; // mapped (rec.getReadUnmappedFlag() != true) private int unmappedReadsCount = 0; // unmapped (rec.getReadUnmappedFlag() == true) private int unplacedReadsCount = 0; // nocoord (alignmentStart == SAMRecord.NO_ALIGNMENT_START) @@ -123,7 +121,8 @@ public Slice( this.cramVersion = cramVersion; sliceHeaderBlock = Block.read(cramVersion, inputStream); if (sliceHeaderBlock.getContentType() != BlockContentType.MAPPED_SLICE) { - throw new RuntimeException("Slice Header Block expected, found: " + sliceHeaderBlock.getContentType().name()); + throw new RuntimeException("Slice Header Block expected, found: " + + sliceHeaderBlock.getContentType().name()); } final InputStream parseInputStream = new ByteArrayInputStream(sliceHeaderBlock.getRawContent()); @@ -151,7 +150,7 @@ public Slice( readTagBytes, 0, readTagBytes.length, ValidationStringency.DEFAULT_STRINGENCY)); } - //NOTE: this reads the underlying blocks from the stream, but doesn't decode them because we don't want + // NOTE: this reads the underlying blocks from the stream, but doesn't decode them because we don't want // to do this automatically since there are case where we want to iterate through containers or slices // (i.e., during indexing, or when satisfying index queries) when we want to consume the underlying blocks, // but not actually decode them @@ -171,7 +170,6 @@ public Slice( * @param compressionHeader the enclosing {@link Container}'s Compression Header * @param containerByteOffset * @param globalRecordCounter - * @return a Slice corresponding to the given records * * Determines whether the slice is single ref, unmapped or multi reference, and derives alignment * boundaries for the slice if single ref. @@ -201,7 +199,9 @@ public Slice( this.compressionHeader = compressionHeader; this.byteOffsetOfContainer = containerByteOffset; - final ContentDigests hasher = ContentDigests.create(ContentDigests.ALL); + // htslib does not write content digest tags (BD/SD/B5/S5/B1/S1) into slice headers. + // These are optional per the spec, and computing SHA-512 + SHA-1 per record is very expensive. + // Block-level CRC32 (required by CRAM 3.0+) provides data integrity verification. final Set referenceContexts = new HashSet<>(); // ignore these values if we later determine this Slice is not single-ref int singleRefAlignmentStart = Integer.MAX_VALUE; @@ -209,7 +209,6 @@ public Slice( int baseCount = 0; for (final CRAMCompressionRecord record : records) { - hasher.add(record); baseCount += record.getReadLength(); if (record.isPlaced()) { @@ -234,16 +233,22 @@ public Slice( } } - this.alignmentContext = getDerivedAlignmentContext( - referenceContexts, - singleRefAlignmentStart, - singleRefAlignmentEnd); + this.alignmentContext = + getDerivedAlignmentContext(referenceContexts, singleRefAlignmentStart, singleRefAlignmentEnd); - sliceTags = hasher.getAsTags(); + sliceTags = null; nRecords = records.size(); this.baseCount = baseCount; this.globalRecordCounter = globalRecordCounter; + // Populate context model with per-record metadata needed by codecs like FQZComp + contextModel.populateFromRecords(records); + + // Link mate pairs within this slice as "attached" instead of "detached". + // Attached mates only store a record offset (NF) instead of full mate info (MF, NS, NP, TS), + // significantly reducing the compressed size for coordinate-sorted paired-end data. + linkMatesWithinSlice(records); + final CramRecordWriter writer = new CramRecordWriter(this); sliceBlocks = writer.writeToSliceBlocks(contextModel, records, alignmentContext.getAlignmentStart()); @@ -251,16 +256,86 @@ public Slice( nSliceBlocks = caclulateNumberOfBlocks(); } - public CRAMVersion getCramVersion() { return cramVersion; } + /** + * Scan records in this slice for mate pairs and link them as "attached" instead of "detached". + * For each pair of records with the same read name that are both paired and neither is + * secondary/supplementary, the earlier record is marked with {@code CF_HAS_MATE_DOWNSTREAM} + * and the NF (records-to-next-fragment) offset is set. The later record remains detached + * but will have its mate info restored from the linked record during decode. + * + *

    Records without a mate in this slice, or that are secondary/supplementary, remain detached. + * + * @param records the CRAM records in this slice + */ + private static void linkMatesWithinSlice(final List records) { + // Map read name → index of first occurrence (for mate pairing) + final java.util.HashMap readNameToIndex = new java.util.HashMap<>(records.size()); + + for (int i = 0; i < records.size(); i++) { + final CRAMCompressionRecord record = records.get(i); + if (!record.isReadPaired() || record.isSecondaryAlignment() || record.isSupplementary()) { + // Unpaired, secondary, or supplementary reads stay detached + continue; + } + + final String readName = record.getReadName(); + final Integer previousIndex = readNameToIndex.get(readName); + + if (previousIndex == null) { + // First occurrence of this read name — remember it + readNameToIndex.put(readName, i); + } else { + // Second occurrence — attempt to link as attached mate pair + final CRAMCompressionRecord previous = records.get(previousIndex); + + // Validate that TLEN is consistent — if the recomputed insert size + // would differ from the original, keep both records detached to preserve + // the original TLEN values (matching htslib's cross-validation behavior) + final int computedTlen = CRAMCompressionRecord.computeInsertSize(previous, record); + if (previous.getTemplateSize() != computedTlen || record.getTemplateSize() != -computedTlen) { + // TLEN mismatch — keep both detached + readNameToIndex.remove(readName); + continue; + } + + // Mark the earlier record as having its mate downstream + previous.setDetached(false); + previous.setHasMateDownStream(true); + previous.setRecordsToNextFragment(i - previousIndex - 1); + + // The later record is the downstream mate — it's not detached but also + // doesn't have a mate downstream (it IS the downstream mate) + record.setDetached(false); + record.setHasMateDownStream(false); + + // Remove from map so we don't match a third record with the same name + // (supplementary/secondary reads are already filtered above) + readNameToIndex.remove(readName); + } + } + } + + public CRAMVersion getCramVersion() { + return cramVersion; + } // May be null - public Block getSliceHeaderBlock() { return sliceHeaderBlock; } + public Block getSliceHeaderBlock() { + return sliceHeaderBlock; + } + + public AlignmentContext getAlignmentContext() { + return alignmentContext; + } + + public SliceBlocks getSliceBlocks() { + return sliceBlocks; + } - public AlignmentContext getAlignmentContext() { return alignmentContext; } - public SliceBlocks getSliceBlocks() { return sliceBlocks; } public int getNumberOfRecords() { return nRecords; } + public long getGlobalRecordCounter() { return globalRecordCounter; } @@ -269,7 +344,10 @@ public long getGlobalRecordCounter() { * @return the number of blocks as defined by the CRAM spec; this is 1 for the * core block plus the number of external blocks (does not include the slice header block); */ - public int getNumberOfBlocks() { return nSliceBlocks; } + public int getNumberOfBlocks() { + return nSliceBlocks; + } + public List getContentIDs() { return contentIDs; } @@ -277,7 +355,10 @@ public List getContentIDs() { private void setContentIDs(final List contentIDs) { this.contentIDs = contentIDs; } - public byte[] getReferenceMD5() { return referenceMD5; } + + public byte[] getReferenceMD5() { + return referenceMD5; + } /** * The Slice's offset in bytes from the beginning of the Container's Compression Header @@ -343,20 +424,18 @@ private int getUnplacedReadsCount() { * @param embeddedReferenceBlockContentID */ public void setEmbeddedReferenceContentID(final int embeddedReferenceBlockContentID) { - if (this.embeddedReferenceBlockContentID != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID && - this.embeddedReferenceBlockContentID != embeddedReferenceBlockContentID) { - throw new CRAMException( - String.format("Can't reset embedded reference content ID (old %d new %d)", - this.embeddedReferenceBlockContentID, embeddedReferenceBlockContentID)); - - } - if (this.embeddedReferenceBlock != null && - this.embeddedReferenceBlock.getContentId() != embeddedReferenceBlockContentID) { - throw new CRAMException( - String.format("Attempt to set embedded reference block content ID (%d) that is in conflict" + - "with the content ID (%d) of the existing reference block ID", - embeddedReferenceBlockContentID, - this.embeddedReferenceBlock.getContentId())); + if (this.embeddedReferenceBlockContentID != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID + && this.embeddedReferenceBlockContentID != embeddedReferenceBlockContentID) { + throw new CRAMException(String.format( + "Can't reset embedded reference content ID (old %d new %d)", + this.embeddedReferenceBlockContentID, embeddedReferenceBlockContentID)); + } + if (this.embeddedReferenceBlock != null + && this.embeddedReferenceBlock.getContentId() != embeddedReferenceBlockContentID) { + throw new CRAMException(String.format( + "Attempt to set embedded reference block content ID (%d) that is in conflict" + + "with the content ID (%d) of the existing reference block ID", + embeddedReferenceBlockContentID, this.embeddedReferenceBlock.getContentId())); } this.embeddedReferenceBlockContentID = embeddedReferenceBlockContentID; } @@ -373,19 +452,20 @@ public int getEmbeddedReferenceContentID() { public void setEmbeddedReferenceBlock(final Block embeddedReferenceBlock) { ValidationUtils.nonNull(embeddedReferenceBlock, "Embedded reference block must be non-null"); - ValidationUtils.validateArg(embeddedReferenceBlock.getContentId() != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID, - String.format("Invalid content ID (%d) for embedded reference block", embeddedReferenceBlock.getContentId())); - ValidationUtils.validateArg(embeddedReferenceBlock.getContentType() == BlockContentType.EXTERNAL, + ValidationUtils.validateArg( + embeddedReferenceBlock.getContentId() != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID, + String.format( + "Invalid content ID (%d) for embedded reference block", embeddedReferenceBlock.getContentId())); + ValidationUtils.validateArg( + embeddedReferenceBlock.getContentType() == BlockContentType.EXTERNAL, String.format("Invalid embedded reference block type (%s)", embeddedReferenceBlock.getContentType())); if (this.embeddedReferenceBlock != null) { throw new CRAMException("Can't reset the slice embedded reference block"); - } else if (this.embeddedReferenceBlockContentID != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID && - embeddedReferenceBlock.getContentId() != this.embeddedReferenceBlockContentID) { - throw new CRAMException( - String.format( - "Embedded reference block content id (%d) conflicts with existing block if (%d)", - embeddedReferenceBlock.getContentId(), - this.embeddedReferenceBlockContentID)); + } else if (this.embeddedReferenceBlockContentID != EMBEDDED_REFERENCE_ABSENT_CONTENT_ID + && embeddedReferenceBlock.getContentId() != this.embeddedReferenceBlockContentID) { + throw new CRAMException(String.format( + "Embedded reference block content id (%d) conflicts with existing block if (%d)", + embeddedReferenceBlock.getContentId(), this.embeddedReferenceBlockContentID)); } setEmbeddedReferenceContentID(embeddedReferenceBlock.getContentId()); @@ -397,9 +477,13 @@ public void setEmbeddedReferenceBlock(final Block embeddedReferenceBlock) { * @return embedded reference block. May be null. */ // Unused because embedded reference isn't implemented for write - public Block getEmbeddedReferenceBlock() { return embeddedReferenceBlock; } + public Block getEmbeddedReferenceBlock() { + return embeddedReferenceBlock; + } - public CompressionHeader getCompressionHeader() { return compressionHeader; } + public CompressionHeader getCompressionHeader() { + return compressionHeader; + } /** * Reads and decodes the underlying blocks and returns a list of CRAMCompressionRecord. This isn't done initially @@ -415,8 +499,7 @@ public void setEmbeddedReferenceBlock(final Block embeddedReferenceBlock) { * @return list of raw (not normalized) CRAMCompressionRecord for this Slice ({@link #normalizeCRAMRecords}) */ public ArrayList deserializeCRAMRecords( - final CompressorCache compressorCache, - final ValidationStringency validationStringency) { + final CompressorCache compressorCache, final ValidationStringency validationStringency) { final CramRecordReader cramRecordReader = new CramRecordReader(this, compressorCache, validationStringency); final ArrayList cramCompressionRecords = new ArrayList<>(nRecords); @@ -425,7 +508,8 @@ public ArrayList deserializeCRAMRecords( int prevAlignmentStart = alignmentContext.getAlignmentStart(); for (int i = 0; i < nRecords; i++) { // read the new record and update the running prevAlignmentStart - final CRAMCompressionRecord cramCompressionRecord = cramRecordReader.readCRAMRecord(globalRecordCounter + i, prevAlignmentStart); + final CRAMCompressionRecord cramCompressionRecord = + cramRecordReader.readCRAMRecord(globalRecordCounter + i, prevAlignmentStart); prevAlignmentStart = cramCompressionRecord.getAlignmentStart(); cramCompressionRecords.add(cramCompressionRecord); } @@ -446,8 +530,8 @@ public ArrayList deserializeCRAMRecords( * @param cramCompressionRecords CRAMCompressionRecords to normalize * @param cramReferenceRegion the reference region for this slice */ - public void normalizeCRAMRecords(final List cramCompressionRecords, - final CRAMReferenceRegion cramReferenceRegion) { + public void normalizeCRAMRecords( + final List cramCompressionRecords, final CRAMReferenceRegion cramReferenceRegion) { boolean hasEmbeddedReference = false; if (compressionHeader.isReferenceRequired()) { // get the reference bases required for the entire slice and validate the reference MD5 @@ -471,14 +555,15 @@ public void normalizeCRAMRecords(final List cramCompressi // restore mate pairing first: for (final CRAMCompressionRecord record : cramCompressionRecords) { - if (record.isReadPaired() && - !record.isDetached() && - record.isHasMateDownStream()) { + if (record.isReadPaired() && !record.isDetached() && record.isHasMateDownStream()) { final CRAMCompressionRecord downMate = cramCompressionRecords.get( // getRecordsToNextFragment returns the value from the NF ("next fragment") data series, // which is interpreted as the number of records to skip within this slice to find the next // mate for this fragment - (int) (record.getSequentialIndex() + record.getRecordsToNextFragment() + 1L - globalRecordCounter)); + (int) (record.getSequentialIndex() + + record.getRecordsToNextFragment() + + 1L + - globalRecordCounter)); record.setNextSegment(downMate); downMate.setPreviousSegment(record); } @@ -498,21 +583,20 @@ public void normalizeCRAMRecords(final List cramCompressi // resolve bases: for (final CRAMCompressionRecord record : cramCompressionRecords) { if (!record.isSegmentUnmapped()) { // read bases for unmapped are restored directly from the input stream - if (compressionHeader.isReferenceRequired() && - getAlignmentContext().getReferenceContext().isMultiRef() && - !record.isUnknownBases() && - !hasEmbeddedReference) { + if (compressionHeader.isReferenceRequired() + && getAlignmentContext().getReferenceContext().isMultiRef() + && !record.isUnknownBases() + && !hasEmbeddedReference) { // if the slice is a multi-ref slice because the reads are not coordinate-sorted, this code // can wind up needing to re-resolve the reference bases for *each* record in the slice, which // can in turn be pathologically slow, especially if the reference source is remote cramReferenceRegion.fetchReferenceBasesByRegion( record.getReferenceIndex(), - record.getAlignmentStart() - 1, // 1 based to 0-based + record.getAlignmentStart() - 1, // 1 based to 0-based record.getAlignmentEnd() - record.getAlignmentStart() + 1); } - record.restoreReadBases( - cramReferenceRegion, - getCompressionHeader().getSubstitutionMatrix()); + record.restoreBasesAndTags( + cramReferenceRegion, getCompressionHeader().getSubstitutionMatrix()); } } @@ -523,7 +607,7 @@ public void normalizeCRAMRecords(final List cramCompressi // in this last pass, set all records as normalized record.setIsNormalized(); } - } + } private int caclulateNumberOfBlocks() { // Each Slice has 1 core data block, plus zero or more external data blocks. @@ -543,7 +627,8 @@ public void write(final CRAMVersion cramVersion, final OutputStream outputStream private byte[] createSliceHeaderBlockContent(final CRAMVersion cramVersion) { final ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); - ITF8.writeUnsignedITF8(getAlignmentContext().getReferenceContext().getReferenceContextID(), byteArrayOutputStream); + ITF8.writeUnsignedITF8( + getAlignmentContext().getReferenceContext().getReferenceContextID(), byteArrayOutputStream); ITF8.writeUnsignedITF8(getAlignmentContext().getAlignmentStart(), byteArrayOutputStream); ITF8.writeUnsignedITF8(getAlignmentContext().getAlignmentSpan(), byteArrayOutputStream); ITF8.writeUnsignedITF8(getNumberOfRecords(), byteArrayOutputStream); @@ -586,11 +671,13 @@ private void baiIndexInitializationCheck() { final StringBuilder error = new StringBuilder(); if (byteOffsetOfSliceHeaderBlock == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for BAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator()); + error.append("Cannot index this Slice for BAI because its byteOffsetFromCompressionHeaderStart is unknown.") + .append(System.lineSeparator()); } if (landmarkIndex == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for BAI because its index is unknown.").append(System.lineSeparator()); + error.append("Cannot index this Slice for BAI because its index is unknown.") + .append(System.lineSeparator()); } if (error.length() > 0) { @@ -606,11 +693,14 @@ private void craiIndexInitializationCheck() { final StringBuilder error = new StringBuilder(); if (byteOffsetOfSliceHeaderBlock == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for CRAI because its byteOffsetFromCompressionHeaderStart is unknown.").append(System.lineSeparator()); + error.append( + "Cannot index this Slice for CRAI because its byteOffsetFromCompressionHeaderStart is unknown.") + .append(System.lineSeparator()); } if (byteSizeOfSliceBlocks == UNINITIALIZED_INDEXING_PARAMETER) { - error.append("Cannot index this Slice for CRAI because its byteSize is unknown.").append(System.lineSeparator()); + error.append("Cannot index this Slice for CRAI because its byteSize is unknown.") + .append(System.lineSeparator()); } if (error.length() > 0) { @@ -639,13 +729,12 @@ private static final AlignmentContext getDerivedAlignmentContext( if (referenceContext.isMappedSingleRef()) { AlignmentContext.validateAlignmentContext( - true, referenceContext, - singleRefAlignmentStart, - singleRefAlignmentEnd - singleRefAlignmentStart + 1); - return new AlignmentContext( + true, referenceContext, singleRefAlignmentStart, singleRefAlignmentEnd - singleRefAlignmentStart + 1); + return new AlignmentContext( + referenceContext, singleRefAlignmentStart, singleRefAlignmentEnd - singleRefAlignmentStart + 1); } else if (referenceContext.isUnmappedUnplaced()) { return AlignmentContext.UNMAPPED_UNPLACED_CONTEXT; } else { @@ -659,17 +748,17 @@ private void validateAlignmentSpanForReference(final CRAMReferenceRegion cramRef return; } - if (referenceBases == null && - alignmentContext.getAlignmentStart() > 0 && - alignmentContext.getReferenceContext().isMappedSingleRef()) { - throw new CRAMException ("No reference bases found for mapped slice ."); + if (referenceBases == null + && alignmentContext.getAlignmentStart() > 0 + && alignmentContext.getReferenceContext().isMappedSingleRef()) { + throw new CRAMException("No reference bases found for mapped slice ."); } - //TODO: CRAMComplianceTest/c1#bounds triggers this (the reads are mapped beyond reference length), - // and CRAMEdgeCasesTest.testNullsAndBeyondRef seems to deliberately test that reads that extend - // beyond the reference length should be ok ? - if (((alignmentContext.getAlignmentStart()-1) < cramReferenceRegion.getRegionStart()) || - (alignmentContext.getAlignmentSpan() > cramReferenceRegion.getRegionLength())) { + // Reads are permitted to extend beyond the reference length (tested by CRAMComplianceTest/c1#bounds + // and CRAMEdgeCasesTest.testNullsAndBeyondRef). This matches samtools/htslib behavior. Log a warning + // but don't fail, since BAMs produced by some aligners contain such reads. + if (((alignmentContext.getAlignmentStart() - 1) < cramReferenceRegion.getRegionStart()) + || (alignmentContext.getAlignmentSpan() > cramReferenceRegion.getRegionLength())) { log.warn(String.format( "Slice mapped outside of reference bases length %d: slice reference context=%s, start=%d, span=%d, counter=%d.", cramReferenceRegion.getFullContigLength(), @@ -680,17 +769,14 @@ private void validateAlignmentSpanForReference(final CRAMReferenceRegion cramRef } } - //VisibleForTesting + // VisibleForTesting void validateReferenceBases(final CRAMReferenceRegion cramReferenceRegion) { if (alignmentContext.getReferenceContext().isMappedSingleRef() && compressionHeader.isReferenceRequired()) { validateAlignmentSpanForReference(cramReferenceRegion); - if (!referenceMD5IsValid( - cramReferenceRegion, - alignmentContext.getAlignmentSpan(), - referenceMD5)) { + if (!referenceMD5IsValid(cramReferenceRegion, alignmentContext.getAlignmentSpan(), referenceMD5)) { throw new CRAMException( String.format( - "The MD5 for the reference failed to validate against the expected value %032x. %s.", + "The MD5 for the reference failed to validate against the expected value %032x. %s.", new BigInteger(1, referenceMD5), "This indicates that the supplied reference is not the one originally used to create the CRAM.")); } @@ -698,9 +784,7 @@ void validateReferenceBases(final CRAMReferenceRegion cramReferenceRegion) { } private static boolean referenceMD5IsValid( - final CRAMReferenceRegion cramReferenceRegion, - final int alignmentSpan, - final byte[] expectedMD5) { + final CRAMReferenceRegion cramReferenceRegion, final int alignmentSpan, final byte[] expectedMD5) { final byte[] referenceBases = cramReferenceRegion.getCurrentReferenceBases(); final int span = Math.min(alignmentSpan, referenceBases.length); // use offset 0 here, based on the assumption that we're always using a CRAMReferenceRegion that @@ -730,8 +814,9 @@ public void setReferenceMD5(final CRAMReferenceRegion cramReferenceRegion) { validateAlignmentSpanForReference(cramReferenceRegion); final byte[] referenceBases = cramReferenceRegion.getCurrentReferenceBases(); - //TODO: how can an alignment context have a start "< 1" ? - if (! alignmentContext.getReferenceContext().isMappedSingleRef() && alignmentContext.getAlignmentStart() < 1) { + // Multi-ref and unmapped/unplaced slices can have alignmentStart < 1 (e.g. 0 for unmapped). + // In that case there's no meaningful reference span, so use a zeroed MD5. + if (!alignmentContext.getReferenceContext().isMappedSingleRef() && alignmentContext.getAlignmentStart() < 1) { referenceMD5 = new byte[MD5_BYTE_SIZE]; } else { final int span = Math.min(alignmentContext.getAlignmentSpan(), referenceBases.length); @@ -747,10 +832,9 @@ public void setReferenceMD5(final CRAMReferenceRegion cramReferenceRegion) { * * @param validationStringency how strict to be when reading CRAM records */ - //VisibleForTesting + // VisibleForTesting public Map getMultiRefAlignmentSpans( - final CompressorCache compressorCache, - final ValidationStringency validationStringency) { + final CompressorCache compressorCache, final ValidationStringency validationStringency) { if (!getAlignmentContext().getReferenceContext().isMultiRef()) { throw new IllegalStateException("can only create multiref span reader for multiref context slice"); } @@ -761,14 +845,16 @@ public Map getMultiRefAlignmentSpans( // See https://github.com/samtools/htsjdk/issues/1347. // Note that this doesn't normalize the CRAMCompressionRecord, which bypasses resolution of bases // against the reference. - final List cramCompressionRecords = deserializeCRAMRecords(compressorCache, validationStringency); + final List cramCompressionRecords = + deserializeCRAMRecords(compressorCache, validationStringency); final Map spans = new HashMap<>(); cramCompressionRecords.forEach(r -> mergeRecordSpan(r, spans)); return Collections.unmodifiableMap(spans); } - private void mergeRecordSpan(final CRAMCompressionRecord cramCompressionRecord, final Map spans) { + private void mergeRecordSpan( + final CRAMCompressionRecord cramCompressionRecord, final Map spans) { // if unplaced: create or replace the current spans map entry. // we don't need to combine entries for different records because // we count them elsewhere and span is irrelevant @@ -777,21 +863,12 @@ private void mergeRecordSpan(final CRAMCompressionRecord cramCompressionRecord, if (cramCompressionRecord.isSegmentUnmapped()) { if (cramCompressionRecord.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) { // count it as both unmapped *and* unplaced, since for BAI we distinguish between them - final AlignmentSpan span = new AlignmentSpan( - SAMRecord.NO_ALIGNMENT_START, - 0, - 0, - 1, - 1); + final AlignmentSpan span = new AlignmentSpan(SAMRecord.NO_ALIGNMENT_START, 0, 0, 1, 1); spans.merge(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, span, AlignmentSpan::combine); } else { // merge it in with the reference context its mapped to final AlignmentSpan span = new AlignmentSpan( - cramCompressionRecord.getAlignmentStart(), - cramCompressionRecord.getReadLength(), - 0, - 1, - 0); + cramCompressionRecord.getAlignmentStart(), cramCompressionRecord.getReadLength(), 0, 1, 0); final int refIndex = cramCompressionRecord.getReferenceIndex(); spans.merge(new ReferenceContext(refIndex), span, AlignmentSpan::combine); } @@ -825,9 +902,8 @@ public List getCRAIEntries(final CompressorCache compressorCache) { craiIndexInitializationCheck(); if (alignmentContext.getReferenceContext().isMultiRef()) { - final Map spans = getMultiRefAlignmentSpans( - compressorCache, - ValidationStringency.DEFAULT_STRINGENCY); + final Map spans = + getMultiRefAlignmentSpans(compressorCache, ValidationStringency.DEFAULT_STRINGENCY); return spans.entrySet().stream() .map(e -> new CRAIEntry( @@ -836,22 +912,19 @@ public List getCRAIEntries(final CompressorCache compressorCache) { e.getValue().getAlignmentSpan(), byteOffsetOfContainer, byteOffsetOfSliceHeaderBlock, - byteSizeOfSliceBlocks) - ) + byteSizeOfSliceBlocks)) .sorted() .collect(Collectors.toList()); } else { // single ref or unmapped final int sequenceId = alignmentContext.getReferenceContext().getReferenceContextID(); - return Collections.singletonList( - new CRAIEntry( - sequenceId, - alignmentContext.getAlignmentStart(), - alignmentContext.getAlignmentSpan(), - byteOffsetOfContainer, - byteOffsetOfSliceHeaderBlock, - byteSizeOfSliceBlocks) - ); + return Collections.singletonList(new CRAIEntry( + sequenceId, + alignmentContext.getAlignmentStart(), + alignmentContext.getAlignmentSpan(), + byteOffsetOfContainer, + byteOffsetOfSliceHeaderBlock, + byteSizeOfSliceBlocks)); } } @@ -867,20 +940,17 @@ public List getBAIEntries(final CompressorCache compressorCache) { final List baiEntries = new ArrayList<>(); switch (getAlignmentContext().getReferenceContext().getType()) { case UNMAPPED_UNPLACED_TYPE: - baiEntries.add( - new BAIEntry( - getAlignmentContext().getReferenceContext(), - new AlignmentSpan( - 0, - 0, - mappedReadsCount, //aligned - unmappedReadsCount, - unplacedReadsCount), - byteOffsetOfContainer, - byteOffsetOfSliceHeaderBlock, - landmarkIndex - ) - ); + baiEntries.add(new BAIEntry( + getAlignmentContext().getReferenceContext(), + new AlignmentSpan( + 0, + 0, + mappedReadsCount, // aligned + unmappedReadsCount, + unplacedReadsCount), + byteOffsetOfContainer, + byteOffsetOfSliceHeaderBlock, + landmarkIndex)); break; case MULTIPLE_REFERENCE_TYPE: @@ -888,57 +958,47 @@ public List getBAIEntries(final CompressorCache compressorCache) { // i.e., there might be only one record per reference context, and thus not enough of any one // to warrant a separate slice) // unmapped span needs to go last - final Map sliceSpanMap = getMultiRefAlignmentSpans( - compressorCache, - ValidationStringency.LENIENT); - sliceSpanMap.entrySet().stream().filter(as -> !as.getKey().equals(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT)).forEach( - entry -> baiEntries.add( - new BAIEntry( - entry.getKey(), - new AlignmentSpan( - entry.getValue().getAlignmentStart(), - entry.getValue().getAlignmentSpan(), - entry.getValue().getMappedCount(), - entry.getValue().getUnmappedCount(), - entry.getValue().getUnmappedUnplacedCount()), - byteOffsetOfContainer, - byteOffsetOfSliceHeaderBlock, - landmarkIndex) - ) - ); + final Map sliceSpanMap = + getMultiRefAlignmentSpans(compressorCache, ValidationStringency.LENIENT); + sliceSpanMap.entrySet().stream() + .filter(as -> !as.getKey().equals(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT)) + .forEach(entry -> baiEntries.add(new BAIEntry( + entry.getKey(), + new AlignmentSpan( + entry.getValue().getAlignmentStart(), + entry.getValue().getAlignmentSpan(), + entry.getValue().getMappedCount(), + entry.getValue().getUnmappedCount(), + entry.getValue().getUnmappedUnplacedCount()), + byteOffsetOfContainer, + byteOffsetOfSliceHeaderBlock, + landmarkIndex))); final AlignmentSpan unmappedSpan = sliceSpanMap.get(ReferenceContext.UNMAPPED_UNPLACED_CONTEXT); if (unmappedSpan != null) { - baiEntries.add( - new BAIEntry( - ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, - unmappedSpan, - byteOffsetOfContainer, - byteOffsetOfSliceHeaderBlock, - landmarkIndex - ) - ); + baiEntries.add(new BAIEntry( + ReferenceContext.UNMAPPED_UNPLACED_CONTEXT, + unmappedSpan, + byteOffsetOfContainer, + byteOffsetOfSliceHeaderBlock, + landmarkIndex)); } break; default: - baiEntries.add( - new BAIEntry( - getAlignmentContext().getReferenceContext(), - new AlignmentSpan( - getAlignmentContext().getAlignmentStart(), - getAlignmentContext().getAlignmentSpan(), - getMappedReadsCount(), - getUnmappedReadsCount(), - getUnplacedReadsCount()), - byteOffsetOfContainer, - byteOffsetOfSliceHeaderBlock, - landmarkIndex - ) - ); + baiEntries.add(new BAIEntry( + getAlignmentContext().getReferenceContext(), + new AlignmentSpan( + getAlignmentContext().getAlignmentStart(), + getAlignmentContext().getAlignmentSpan(), + getMappedReadsCount(), + getUnmappedReadsCount(), + getUnplacedReadsCount()), + byteOffsetOfContainer, + byteOffsetOfSliceHeaderBlock, + landmarkIndex)); break; } return baiEntries; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocks.java b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocks.java index c15e1c7ae2..ef93808053 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocks.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocks.java @@ -29,7 +29,6 @@ import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.cram.structure.block.BlockContentType; import htsjdk.utils.ValidationUtils; - import java.io.InputStream; import java.io.OutputStream; import java.util.*; @@ -83,7 +82,8 @@ public SliceBlocks(final CRAMVersion cramVersion, final int numberOfBlocks, fina break; default: - throw new RuntimeException("Not a slice block, content type id " + block.getContentType().name()); + throw new RuntimeException("Not a slice block, content type id " + + block.getContentType().name()); } } if (getCoreBlock() == null) { @@ -94,9 +94,11 @@ public SliceBlocks(final CRAMVersion cramVersion, final int numberOfBlocks, fina /** * Return the core block for this Slice. May be null. */ - public Block getCoreBlock() { return coreBlock; } + public Block getCoreBlock() { + return coreBlock; + } - /** + /** * Get the external block corresponding to a contentID. * @param contentID contentID identifying the external block * @return external block for the contentID. May be null. @@ -118,7 +120,9 @@ public List getExternalContentIDs() { * Number of external locks present in this SliceBlocks object (does not include the core block). * @return number of external blocks, including any embedded reference block, but excluding the core block */ - public int getNumberOfExternalBlocks() { return externalBlocks.size(); } + public int getNumberOfExternalBlocks() { + return externalBlocks.size(); + } /** * Write the coreBlock and each external block out to a CRAM stream. There is no predefined @@ -156,17 +160,16 @@ private void setCoreBlock(final Block coreBlock) { * @param externalBlock An external block. May not be null, and must not already be present in this SliceBlocks. */ private void addExternalBlock(final Block externalBlock) { - ValidationUtils.validateArg(externalBlock.getContentType() == BlockContentType.EXTERNAL, "Invalid external block"); + ValidationUtils.validateArg( + externalBlock.getContentType() == BlockContentType.EXTERNAL, "Invalid external block"); if (externalBlocks.containsKey(externalBlock.getContentId())) { - throw new CRAMException( - String.format( - "Attempt to add a duplicate block (id %d of type %s) to compression header encoding map. " + - "Existing block is of type %s.", - externalBlock.getContentId(), - externalBlock.getContentType(), - externalBlocks.get(externalBlock.getContentId()).getContentType())); + throw new CRAMException(String.format( + "Attempt to add a duplicate block (id %d of type %s) to compression header encoding map. " + + "Existing block is of type %s.", + externalBlock.getContentId(), + externalBlock.getContentType(), + externalBlocks.get(externalBlock.getContentId()).getContentType())); } externalBlocks.put(externalBlock.getContentId(), externalBlock); } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksReadStreams.java b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksReadStreams.java index 4befa833a9..48b78fd782 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksReadStreams.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksReadStreams.java @@ -1,35 +1,11 @@ -/* - * Copyright (c) 2019 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.io.BitInputStream; +import htsjdk.samtools.cram.io.CRAMByteReader; import htsjdk.samtools.cram.io.DefaultBitInputStream; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.utils.ValidationUtils; - import java.io.ByteArrayInputStream; import java.util.HashMap; import java.util.List; @@ -39,37 +15,41 @@ * Provides a layer over a {@link SliceBlocks} object and acts as a bridge between the DataSeries codecs * and their underlying blocks when reading a CRAM stream by presenting a bit (core) or byte (external) stream * for each block. + * + *

    External block streams use unsynchronized {@link CRAMByteReader} instead of + * {@link ByteArrayInputStream} to eliminate synchronized method call overhead in the hot decode path. */ public class SliceBlocksReadStreams { // bit input stream for the core block private final BitInputStream coreBlockInputStream; - // Map of ByteArrayInputStreams for all external contentIDs, including tag blocks, by content ID - private final Map externalInputStreams = new HashMap<>(); + // Map of CRAMByteReaders for all external contentIDs, including tag blocks, by content ID + private final Map externalReaders = new HashMap<>(); /** * @param sliceBlocks {@link SliceBlocks} that have been populated from a CRAM stream */ public SliceBlocksReadStreams(final SliceBlocks sliceBlocks, final CompressorCache compressorCache) { ValidationUtils.nonNull(sliceBlocks.getCoreBlock(), "sliceBlocks must have been initialized"); - ValidationUtils.validateArg(sliceBlocks.getNumberOfExternalBlocks() > 0, "sliceBlocks must have been initialized"); + ValidationUtils.validateArg( + sliceBlocks.getNumberOfExternalBlocks() > 0, "sliceBlocks must have been initialized"); if (sliceBlocks.getCoreBlock() == null || sliceBlocks.getNumberOfExternalBlocks() == 0) { throw new CRAMException("slice blocks must be initialized before being used with a reader"); } + // Core block still uses DefaultBitInputStream (bit-level access needed for Huffman codecs) coreBlockInputStream = new DefaultBitInputStream( - new ByteArrayInputStream( - sliceBlocks.getCoreBlock().getUncompressedContent(compressorCache))); + new ByteArrayInputStream(sliceBlocks.getCoreBlock().getUncompressedContent(compressorCache))); final List externalContentIDs = sliceBlocks.getExternalContentIDs(); for (final Integer contentID : externalContentIDs) { final Block block = sliceBlocks.getExternalBlock(contentID); - externalInputStreams.put(contentID, new ByteArrayInputStream(block.getUncompressedContent(compressorCache))); + externalReaders.put(contentID, new CRAMByteReader(block.getUncompressedContent(compressorCache))); } } /** - * Get the {@link BitInputStream} for this {@link SliceBlocks} core block + * Get the {@link BitInputStream} for this {@link SliceBlocks} core block. * @return {@link BitInputStream} for the core block */ public BitInputStream getCoreBlockInputStream() { @@ -77,9 +57,11 @@ public BitInputStream getCoreBlockInputStream() { } /** - * Get the ByteArrayInputStream for the given contentID. - * @param contentID - * @return ByteArrayInputStream for contentID + * Get the {@link CRAMByteReader} for the given content ID. + * @param contentID the external block content ID + * @return CRAMByteReader for the content ID */ - public ByteArrayInputStream getExternalInputStream(final Integer contentID) { return externalInputStreams.get(contentID); } + public CRAMByteReader getExternalReader(final Integer contentID) { + return externalReaders.get(contentID); + } } diff --git a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksWriteStreams.java b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksWriteStreams.java index 6209e18f3d..7723c37337 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksWriteStreams.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SliceBlocksWriteStreams.java @@ -1,35 +1,11 @@ -/* - * Copyright (c) 2019 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ package htsjdk.samtools.cram.structure; import htsjdk.samtools.cram.CRAMException; import htsjdk.samtools.cram.io.BitOutputStream; +import htsjdk.samtools.cram.io.CRAMByteWriter; import htsjdk.samtools.cram.io.DefaultBitOutputStream; import htsjdk.samtools.cram.structure.block.Block; import htsjdk.samtools.util.RuntimeIOException; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; @@ -41,6 +17,9 @@ * Provides a layer over a {@link SliceBlocks} object and acts as a bridge between the DataSeries codecs * and their underlying blocks when writing a CRAM stream by presenting a bit (core) or byte (external) stream * for each block. + * + *

    External block streams use unsynchronized {@link CRAMByteWriter} instead of + * {@link ByteArrayOutputStream} to eliminate synchronized method call overhead in the hot encode path. */ public class SliceBlocksWriteStreams { @@ -48,8 +27,8 @@ public class SliceBlocksWriteStreams { private final ByteArrayOutputStream coreBlockByteOutputStream; private final BitOutputStream coreBlockBitOutputStream; - // content ID to ByteArrayOutputStream - private final Map externalOutputStreams = new TreeMap<>(); + // content ID to CRAMByteWriter + private final Map externalWriters = new TreeMap<>(); /** * @param compressionHeader {@link CompressionHeader} for the container containing the slice @@ -57,58 +36,58 @@ public class SliceBlocksWriteStreams { public SliceBlocksWriteStreams(final CompressionHeader compressionHeader) { this.compressionHeader = compressionHeader; + // Core block still uses DefaultBitOutputStream (bit-level access needed for Huffman codecs) coreBlockByteOutputStream = new ByteArrayOutputStream(); coreBlockBitOutputStream = new DefaultBitOutputStream(coreBlockByteOutputStream); - // Create an output stream for each external content ID in the encoding map + // Create a writer for each external content ID in the encoding map for (final Integer contentID : compressionHeader.getEncodingMap().getExternalIDs()) { - externalOutputStreams.put(contentID, new ByteArrayOutputStream()); + externalWriters.put(contentID, new CRAMByteWriter()); } } /** - * @return the {@link BitOutputStream} for the core block + * @return the {@link BitOutputStream} for the core block */ - public BitOutputStream getCoreOutputStream() { return coreBlockBitOutputStream; } + public BitOutputStream getCoreOutputStream() { + return coreBlockBitOutputStream; + } /** - * Get the ByteArrayOutputStream corresponding to the requested contentID + * Get the {@link CRAMByteWriter} corresponding to the requested content ID. * @param contentID ID of content being requested - * @return ByteArrayOutputStream for contentID + * @return CRAMByteWriter for the content ID */ - public ByteArrayOutputStream getExternalOutputStream(final Integer contentID) { return externalOutputStreams.get(contentID); } + public CRAMByteWriter getExternalWriter(final Integer contentID) { + return externalWriters.get(contentID); + } /** - * Compress and write each stream to a corresponding Block (note that this does not write - * the blocks themselves to a container output stream - that can't happen until the slice is aggregated - * into a container. + * Compress and write each stream to a corresponding Block. */ public SliceBlocks flushStreamsToBlocks(final CRAMCodecModelContext contextModel) { - closeAllStreams(); + closeCoreStream(); // core block is raw (no compression) and must be written first (prescribed by the spec) final Block coreBlock = Block.createRawCoreDataBlock(coreBlockByteOutputStream.toByteArray()); final List externalBlocks = new ArrayList<>(); - externalOutputStreams.forEach((contentId, contentStream) -> { + externalWriters.forEach((contentId, writer) -> { if (contentId.equals(Block.NO_CONTENT_ID)) { throw new CRAMException("A valid content ID is required. Given: " + contentId); } - externalBlocks.add(compressionHeader.getEncodingMap().createCompressedBlockForStream(contextModel, contentId, contentStream)); - }); + externalBlocks.add( + compressionHeader.getEncodingMap().createCompressedBlockForWriter(contextModel, contentId, writer)); + }); return new SliceBlocks(coreBlock, externalBlocks); - } + } - private void closeAllStreams() { + private void closeCoreStream() { try { getCoreOutputStream().close(); - for (ByteArrayOutputStream baos : externalOutputStreams.values()) { - baos.close(); - } } catch (final IOException e) { throw new RuntimeIOException(e); } } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/SubstitutionBase.java b/src/main/java/htsjdk/samtools/cram/structure/SubstitutionBase.java index 5bb933f0a3..809913c556 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SubstitutionBase.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SubstitutionBase.java @@ -14,11 +14,15 @@ enum SubstitutionBase { // The base this substitution represents private final byte base; - SubstitutionBase() { this.base = (byte) name().charAt(0); } + SubstitutionBase() { + this.base = (byte) name().charAt(0); + } /** * The base this substitution represents * @return the underlying base */ - public byte getBase() { return base; } + public byte getBase() { + return base; + } }; diff --git a/src/main/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java b/src/main/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java index 31b9ebafad..cc87dbf478 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java +++ b/src/main/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java @@ -20,7 +20,6 @@ import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature; import htsjdk.samtools.cram.encoding.readfeatures.Substitution; import htsjdk.samtools.util.Log; - import java.util.Arrays; import java.util.Comparator; import java.util.List; @@ -38,7 +37,7 @@ * handled for upper case) although it does not *generate* substitutions for lower case reference * bases. */ - public class SubstitutionMatrix { +public class SubstitutionMatrix { private static final Log log = Log.getInstance(Substitution.class); // substitution bases, in the order in which they're stored in the substitution matrix @@ -157,12 +156,14 @@ public SubstitutionMatrix(final byte[] matrix) { */ public byte code(final byte refBase, final byte readBase) { if (refBase <= 0 || Character.isLowerCase((char) refBase)) { - throw new IllegalArgumentException( - String.format("CRAM: Attempt to generate a substitution code for invalid or lower case reference base '%c'", (char) refBase)); + throw new IllegalArgumentException(String.format( + "CRAM: Attempt to generate a substitution code for invalid or lower case reference base '%c'", + (char) refBase)); } if (readBase <= 0) { - throw new IllegalArgumentException( - String.format("CRAM: Attempt to generate a substitution code for an invalid read base value '%c'", (char) readBase)); + throw new IllegalArgumentException(String.format( + "CRAM: Attempt to generate a substitution code for an invalid read base value '%c'", + (char) readBase)); } return codeByBase[refBase][readBase]; } @@ -175,13 +176,14 @@ public byte code(final byte refBase, final byte readBase) { */ public byte base(final byte refBase, final byte code) { if (refBase <= 0) { - throw new IllegalArgumentException( - String.format("CRAM: Attempt to generate a substitution code for invalid reference base '%c'", (char) refBase)); + throw new IllegalArgumentException(String.format( + "CRAM: Attempt to generate a substitution code for invalid reference base '%c'", (char) refBase)); } final byte base = baseByCode[refBase][code]; if (base == NO_BASE) { // attempt to retrieve a code for a reference base that isn't in the substitution matrix - throw new IllegalArgumentException(String.format("CRAM: Attempt to retrieve a substitution base for invalid base '%c'", (char) refBase)); + throw new IllegalArgumentException(String.format( + "CRAM: Attempt to retrieve a substitution base for invalid base '%c'", (char) refBase)); } return base; } @@ -208,7 +210,7 @@ public String toString() { } // lower case substitutions for (final SubstitutionBase r : BASES) { - char lowerCaseBase = Character.toLowerCase((char)r.getBase()); + char lowerCaseBase = Character.toLowerCase((char) r.getBase()); stringBuilder.append(lowerCaseBase); stringBuilder.append(':'); for (int i = 0; i < CODES_PER_BASE; i++) { @@ -230,8 +232,9 @@ private static long[][] buildFrequencies(final List cramC final byte refBase = substitution.getReferenceBase(); final byte base = substitution.getBase(); if (refBase <= 0 || base <= 0) { - throw new IllegalArgumentException( - String.format("CRAM: Attempt to generate a substitution code for invalid reference base with value '%d'", refBase)); + throw new IllegalArgumentException(String.format( + "CRAM: Attempt to generate a substitution code for invalid reference base with value '%d'", + refBase)); } frequencies[refBase][base]++; } @@ -252,15 +255,14 @@ public SubstitutionFrequency(final SubstitutionBase substituteBase, final long f } } - private static final Comparator COMPARATOR = - (o1, o2) -> { - // primary sort by frequency - if (o1.freq != o2.freq) { - return (int) (o2.freq - o1.freq); - } - // same frequency; compare based on spec tie-breaking rule (use base order prescribed by the spec) - return o1.substituteBase.ordinal() - o2.substituteBase.ordinal(); - }; + private static final Comparator COMPARATOR = (o1, o2) -> { + // primary sort by frequency + if (o1.freq != o2.freq) { + return (int) (o2.freq - o1.freq); + } + // same frequency; compare based on spec tie-breaking rule (use base order prescribed by the spec) + return o1.substituteBase.ordinal() - o2.substituteBase.ordinal(); + }; // For the given base, return a packed substitution vector containing the possible // substitution codes given the set of substitution frequencies for that base. @@ -307,5 +309,4 @@ private byte substitutionCodeVector(final byte refBase, final long[] frequencies return codeVector; } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/TagKeyCache.java b/src/main/java/htsjdk/samtools/cram/structure/TagKeyCache.java new file mode 100644 index 0000000000..dc5c6f8c55 --- /dev/null +++ b/src/main/java/htsjdk/samtools/cram/structure/TagKeyCache.java @@ -0,0 +1,105 @@ +package htsjdk.samtools.cram.structure; + +import htsjdk.samtools.SAMTag; + +/** + * Caches the per-tag-ID metadata that is invariant across all records in a slice. + * + *

    In CRAM, each tag is identified by a 3-byte ID (2 bytes tag name + 1 byte type) packed + * into an int. The tag ID dictionary in the compression header defines the small set of + * unique tag IDs used in a slice (typically 5-20). This class pre-computes and caches + * the derived String keys, binary tag codes, and type characters so they can be reused + * across millions of records without repeated allocation.

    + * + *

    Internally uses parallel arrays with linear scan lookup, which is optimal for the + * small number of entries typical in CRAM slices (fits in 1-2 cache lines).

    + */ +public final class TagKeyCache { + + /** Pre-computed metadata for a single tag ID. */ + public static final class TagKeyInfo { + /** Two-character tag name, e.g. "NM", "MD", "RG". */ + public final String key; + /** Three-character tag name + type, e.g. "NMi", "MDZ". */ + public final String keyType3Bytes; + /** The 3-byte tag ID packed as an int (name high bytes, type low byte). */ + public final int keyType3BytesAsInt; + /** Binary tag code as computed by {@link SAMTag#makeBinaryTag}. */ + public final short code; + /** The single-character type code, e.g. 'i', 'Z', 'A'. */ + public final char type; + + private TagKeyInfo(final int id) { + final char c1 = (char) ((id >> 16) & 0xFF); + final char c2 = (char) ((id >> 8) & 0xFF); + this.type = (char) (id & 0xFF); + this.key = new String(new char[] {c1, c2}); + this.keyType3Bytes = new String(new char[] {c1, c2, this.type}); + this.keyType3BytesAsInt = id; + this.code = SAMTag.makeBinaryTag(this.key); + } + } + + private final int[] ids; + private final TagKeyInfo[] infos; + private final int size; + + /** + * Creates a TagKeyCache from a tag ID dictionary. + * + * @param tagIDDictionary the tag ID dictionary from the compression header, where each + * entry in the outer array is a combination of tag IDs (as 3-byte arrays) + * that appear together on records + */ + public TagKeyCache(final byte[][][] tagIDDictionary) { + // Collect unique tag IDs across all dictionary entries + // Use a simple approach: accumulate into oversized arrays, then we'll use them directly. + // Worst case there are ~50 unique tags; typical is 5-20. + int capacity = 0; + for (final byte[][] entry : tagIDDictionary) { + capacity += entry.length; + } + + final int[] tempIds = new int[capacity]; + final TagKeyInfo[] tempInfos = new TagKeyInfo[capacity]; + int count = 0; + + for (final byte[][] entry : tagIDDictionary) { + for (final byte[] tagBytes : entry) { + final int id = ReadTag.name3BytesToInt(tagBytes); + // Check if we already have this ID (linear scan is fine for small N) + boolean found = false; + for (int i = 0; i < count; i++) { + if (tempIds[i] == id) { + found = true; + break; + } + } + if (!found) { + tempIds[count] = id; + tempInfos[count] = new TagKeyInfo(id); + count++; + } + } + } + + this.ids = tempIds; + this.infos = tempInfos; + this.size = count; + } + + /** + * Looks up the cached metadata for the given 3-byte tag ID. + * + * @param id the tag ID as a packed int (2 bytes name + 1 byte type) + * @return the cached metadata, or {@code null} if the ID is not in the cache + */ + public TagKeyInfo get(final int id) { + for (int i = 0; i < size; i++) { + if (ids[i] == id) { + return infos[i]; + } + } + return null; + } +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/block/Block.java b/src/main/java/htsjdk/samtools/cram/structure/block/Block.java index 4acc4f4aa6..e5b6e27068 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/block/Block.java +++ b/src/main/java/htsjdk/samtools/cram/structure/block/Block.java @@ -26,7 +26,6 @@ import htsjdk.samtools.cram.structure.CompressorCache; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.utils.ValidationUtils; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -80,11 +79,12 @@ public class Block { * @param compressedContent the compressed form of the data to be stored in this block * @param uncompressedLength the length of the content stored in this block when uncompressed */ - protected Block(final BlockCompressionMethod compressionMethod, - final BlockContentType contentType, - final int contentId, - final byte[] compressedContent, - final int uncompressedLength) { + protected Block( + final BlockCompressionMethod compressionMethod, + final BlockContentType contentType, + final int contentId, + final byte[] compressedContent, + final int uncompressedLength) { this.compressionMethod = compressionMethod; this.contentType = contentType; this.contentId = contentId; @@ -94,9 +94,9 @@ protected Block(final BlockCompressionMethod compressionMethod, // There are quite a few htsjdk and GATk test files around that contain external blocks that violate this // (that is they have contentID==0). So we may have to leave this out, and only validate that we don't violate // this on write. See https://github.com/samtools/htsjdk/issues/1232 - //if (type == BlockContentType.EXTERNAL && getContentId() == Block.NO_CONTENT_ID) { + // if (type == BlockContentType.EXTERNAL && getContentId() == Block.NO_CONTENT_ID) { // throw new CRAMException("Valid Content ID required for external blocks."); - //} + // } if (contentType != BlockContentType.EXTERNAL && contentId != Block.NO_CONTENT_ID) { throw new CRAMException("Cannot set a Content ID for non-external blocks."); @@ -128,7 +128,8 @@ private static Block createRawNonExternalBlock(final BlockContentType contentTyp public static Block createGZIPFileHeaderBlock(final byte[] rawContent) { return new Block( BlockCompressionMethod.GZIP, - BlockContentType.FILE_HEADER, NO_CONTENT_ID, + BlockContentType.FILE_HEADER, + NO_CONTENT_ID, (new GZIPExternalCompressor()).compress(rawContent, null), rawContent.length); } @@ -175,13 +176,14 @@ public static Block createRawCoreDataBlock(final byte[] rawContent) { * @param compressedContent the content of this block, in compressed mode * @param uncompressedLength the length of the content stored in this block when uncompressed */ - public static Block createExternalBlock(final BlockCompressionMethod compressionMethod, - final int contentId, - final byte[] compressedContent, - final int uncompressedLength) { + public static Block createExternalBlock( + final BlockCompressionMethod compressionMethod, + final int contentId, + final byte[] compressedContent, + final int uncompressedLength) { ValidationUtils.validateArg(contentId >= 0, "Invalid external block content id"); - return new Block(compressionMethod, BlockContentType.EXTERNAL, - contentId, compressedContent, uncompressedLength); + return new Block( + compressionMethod, BlockContentType.EXTERNAL, contentId, compressedContent, uncompressedLength); } public final BlockCompressionMethod getCompressionMethod() { @@ -214,7 +216,8 @@ public int getContentId() { * @throws IllegalArgumentException if the block is not {@link BlockCompressionMethod#RAW}. */ public final byte[] getRawContent() { - ValidationUtils.validateArg(getCompressionMethod() == BlockCompressionMethod.RAW, + ValidationUtils.validateArg( + getCompressionMethod() == BlockCompressionMethod.RAW, "getRawContent should only be called on blocks with RAW compression method"); return compressedContent; } @@ -228,13 +231,13 @@ public final byte[] getRawContent() { */ public final byte[] getUncompressedContent(final CompressorCache compressorCache) { // when uncompressing, no compressor-specific args are required since any variant of the compressor will do - final ExternalCompressor compressor = compressorCache.getCompressorForMethod(compressionMethod, ExternalCompressor.NO_COMPRESSION_ARG); + final ExternalCompressor compressor = + compressorCache.getCompressorForMethod(compressionMethod, ExternalCompressor.NO_COMPRESSION_ARG); final byte[] uncompressedContent = compressor.uncompress(compressedContent); if (uncompressedContent.length != uncompressedLength) { throw new CRAMException(String.format( "Block uncompressed length did not match expected length: %04x vs %04x", - uncompressedLength, - uncompressedContent.length)); + uncompressedLength, uncompressedContent.length)); } return uncompressedContent; } @@ -286,13 +289,13 @@ public static Block read(final CRAMVersion cramVersion, InputStream inputStream) final int actualChecksum = ((CRC32InputStream) inputStream).getCRC32(); final int checksum = CramInt.readInt32(inputStream); if (checksum != actualChecksum) { - throw new RuntimeException(String.format("Block CRC32 mismatch, actual: %04x expected: %04x", checksum, actualChecksum)); + throw new RuntimeException(String.format( + "Block CRC32 mismatch, actual: %04x expected: %04x", checksum, actualChecksum)); } } return new Block(compressionMethod, contentType, contentId, compressedContent, uncompressedSize); - } - catch (final IOException e) { + } catch (final IOException e) { throw new RuntimeIOException(e); } } @@ -313,8 +316,7 @@ public final void write(final CRAMVersion cramVersion, final OutputStream output } else { doWrite(outputStream); } - } - catch (final IOException e) { + } catch (final IOException e) { throw new RuntimeIOException(e); } } @@ -332,12 +334,12 @@ private void doWrite(final OutputStream outputStream) throws IOException { @Override public String toString() { - return String.format("method=%s, type=%s, id=%d, raw size=%d, compressed size=%d", + return String.format( + "method=%s, type=%s, id=%d, raw size=%d, compressed size=%d", getCompressionMethod().name(), getContentType().name(), getContentId(), getUncompressedContentSize(), getCompressedContentSize()); } - } diff --git a/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java b/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java index ee18f40334..74f57832ce 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java +++ b/src/main/java/htsjdk/samtools/cram/structure/block/BlockCompressionMethod.java @@ -18,7 +18,6 @@ package htsjdk.samtools.cram.structure.block; import htsjdk.samtools.cram.CRAMException; - import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; @@ -69,4 +68,4 @@ public static BlockCompressionMethod byId(final int id) { private static final Map ID_MAP = Collections.unmodifiableMap(Stream.of(BlockCompressionMethod.values()) .collect(Collectors.toMap(BlockCompressionMethod::getMethodId, Function.identity()))); -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/cram/structure/block/BlockContentType.java b/src/main/java/htsjdk/samtools/cram/structure/block/BlockContentType.java index 66e5132506..179bd5a732 100644 --- a/src/main/java/htsjdk/samtools/cram/structure/block/BlockContentType.java +++ b/src/main/java/htsjdk/samtools/cram/structure/block/BlockContentType.java @@ -18,7 +18,6 @@ package htsjdk.samtools.cram.structure.block; import htsjdk.samtools.cram.CRAMException; - import java.util.Collections; import java.util.Map; import java.util.Optional; diff --git a/src/main/java/htsjdk/samtools/example/ExampleSamUsage.java b/src/main/java/htsjdk/samtools/example/ExampleSamUsage.java index 925e05330c..de23e821c5 100644 --- a/src/main/java/htsjdk/samtools/example/ExampleSamUsage.java +++ b/src/main/java/htsjdk/samtools/example/ExampleSamUsage.java @@ -32,7 +32,6 @@ import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.ValidationStringency; import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.File; import java.io.IOException; import java.net.MalformedURLException; @@ -53,43 +52,42 @@ public void openSamExamples() throws MalformedURLException { /** * With different reader options */ - final SamReader readerFromConfiguredFactory = - SamReaderFactory.make() - .enable(SamReaderFactory.Option.DONT_MEMORY_MAP_INDEX) - .validationStringency(ValidationStringency.SILENT) - .samRecordFactory(DefaultSAMRecordFactory.getInstance()) - .open(new File("/my.bam")); + final SamReader readerFromConfiguredFactory = SamReaderFactory.make() + .enable(SamReaderFactory.Option.DONT_MEMORY_MAP_INDEX) + .validationStringency(ValidationStringency.SILENT) + .samRecordFactory(DefaultSAMRecordFactory.getInstance()) + .open(new File("/my.bam")); /** - * With a more complicated source + * With a more complicated source */ - final SamReader complicatedReader = - SamReaderFactory.makeDefault() - .open( - SamInputResource.of(new URL("http://broadinstitute.org/my.bam")).index(myIndexSeekableStream()) - ); + final SamReader complicatedReader = SamReaderFactory.makeDefault() + .open(SamInputResource.of(new URL("http://broadinstitute.org/my.bam")) + .index(myIndexSeekableStream())); /** * Broken down */ - final SamReaderFactory factory = - SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.LENIENT); + final SamReaderFactory factory = SamReaderFactory.makeDefault() + .enable(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS) + .validationStringency(ValidationStringency.LENIENT); - final SamInputResource resource = SamInputResource.of(new File("/my.bam")).index(new URL("http://broadinstitute.org/my.bam.bai")); + final SamInputResource resource = + SamInputResource.of(new File("/my.bam")).index(new URL("http://broadinstitute.org/my.bam.bai")); final SamReader myReader = factory.open(resource); for (final SAMRecord samRecord : myReader) { System.err.print(samRecord); } - } /** * Read a SAM or BAM file, convert each read name to upper case, and write a new * SAM or BAM file. */ - public void convertReadNamesToUpperCase(final File inputSamOrBamFile, final File outputSamOrBamFile) throws IOException { + public void convertReadNamesToUpperCase(final File inputSamOrBamFile, final File outputSamOrBamFile) + throws IOException { final SamReader reader = SamReaderFactory.makeDefault().open(inputSamOrBamFile); @@ -103,8 +101,8 @@ public void convertReadNamesToUpperCase(final File inputSamOrBamFile, final File // can be written to the output file directly rather than being written to a temporary file // and sorted after all records have been sent to outputSam. - final SAMFileWriter outputSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), - true, outputSamOrBamFile); + final SAMFileWriter outputSam = + new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, outputSamOrBamFile); for (final SAMRecord samRecord : reader) { // Convert read name to upper case. diff --git a/src/main/java/htsjdk/samtools/example/PrintReadsExample.java b/src/main/java/htsjdk/samtools/example/PrintReadsExample.java index 7bbec0ae5d..9e38dc0ee3 100755 --- a/src/main/java/htsjdk/samtools/example/PrintReadsExample.java +++ b/src/main/java/htsjdk/samtools/example/PrintReadsExample.java @@ -25,7 +25,6 @@ import htsjdk.samtools.*; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.ProgressLogger; - import java.io.File; import java.io.IOException; import java.net.InetAddress; @@ -33,7 +32,6 @@ import java.util.List; import java.util.stream.Collectors; - /** * This is a example program showing how to use SAM readers and (optionally) writers. * It's also useful for measuring time. @@ -45,18 +43,19 @@ * - the third argument is optional and is the name of the output file (nothing gets written if this argument is missing) */ public final class PrintReadsExample { - private PrintReadsExample() { - } + private PrintReadsExample() {} private static final Log log = Log.getInstance(PrintReadsExample.class); public static void main(String[] args) throws IOException { if (args.length < 2) { - System.out.println("Usage: " + PrintReadsExample.class.getCanonicalName() + " inFile eagerDecode [outFile]"); + System.out.println( + "Usage: " + PrintReadsExample.class.getCanonicalName() + " inFile eagerDecode [outFile]"); System.exit(1); } final File inputFile = new File(args[0]); - final boolean eagerDecode = Boolean.parseBoolean(args[1]); //useful to test (realistic) scenarios in which every record is always fully decoded. + final boolean eagerDecode = Boolean.parseBoolean( + args[1]); // useful to test (realistic) scenarios in which every record is always fully decoded. final File outputFile = args.length >= 3 ? new File(args[2]) : null; final long start = System.currentTimeMillis(); @@ -64,14 +63,16 @@ public static void main(String[] args) throws IOException { log.info("Start with args:" + Arrays.toString(args)); printConfigurationInfo(); - SamReaderFactory readerFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); + SamReaderFactory readerFactory = + SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT); if (eagerDecode) { readerFactory = readerFactory.enable(SamReaderFactory.Option.EAGERLY_DECODE); } try (final SamReader reader = readerFactory.open(inputFile)) { final SAMFileHeader header = reader.getFileHeader(); - try (final SAMFileWriter writer = outputFile != null ? new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile) : null) { + try (final SAMFileWriter writer = + outputFile != null ? new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile) : null) { final ProgressLogger pl = new ProgressLogger(log, 1000000); for (final SAMRecord record : reader) { if (writer != null) { @@ -86,13 +87,15 @@ public static void main(String[] args) throws IOException { } private static void printConfigurationInfo() throws IOException { - log.info("Executing as " + - System.getProperty("user.name") + '@' + InetAddress.getLocalHost().getHostName() + - " on " + System.getProperty("os.name") + ' ' + System.getProperty("os.version") + - ' ' + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") + - ' ' + System.getProperty("java.runtime.version")); + log.info("Executing as " + System.getProperty("user.name") + + '@' + InetAddress.getLocalHost().getHostName() + " on " + + System.getProperty("os.name") + ' ' + System.getProperty("os.version") + ' ' + + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") + ' ' + + System.getProperty("java.runtime.version")); - final List list = Defaults.allDefaults().entrySet().stream().map(e -> e.getKey() + ':' + e.getValue()).collect(Collectors.toList()); + final List list = Defaults.allDefaults().entrySet().stream() + .map(e -> e.getKey() + ':' + e.getValue()) + .collect(Collectors.toList()); log.info(String.join(" ", list)); } } diff --git a/src/main/java/htsjdk/samtools/fastq/AsyncFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/AsyncFastqWriter.java index dee0a1612b..a93ab11332 100644 --- a/src/main/java/htsjdk/samtools/fastq/AsyncFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/AsyncFastqWriter.java @@ -14,7 +14,18 @@ public AsyncFastqWriter(final FastqWriter out, final int queueSize) { this.writer = out; } - @Override protected String getThreadNamePrefix() { return "FastqWriterThread-"; } - @Override protected void synchronouslyWrite(final FastqRecord item) { this.writer.write(item); } - @Override protected void synchronouslyClose() { this.writer.close(); } + @Override + protected String getThreadNamePrefix() { + return "FastqWriterThread-"; + } + + @Override + protected void synchronouslyWrite(final FastqRecord item) { + this.writer.write(item); + } + + @Override + protected void synchronouslyClose() { + this.writer.close(); + } } diff --git a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java index 271401dbfe..1df25a52ff 100644 --- a/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/BasicFastqWriter.java @@ -25,7 +25,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.io.Flushable; import java.io.OutputStream; @@ -35,7 +34,7 @@ * In general FastqWriterFactory should be used so that AsyncFastqWriter can be enabled, but there are some * cases in which that behavior is explicitly not wanted. */ -public class BasicFastqWriter implements FastqWriter,Flushable { +public class BasicFastqWriter implements FastqWriter, Flushable { private final String path; private final PrintStream writer; @@ -48,7 +47,7 @@ public BasicFastqWriter(final File file, final boolean createMd5) { } private BasicFastqWriter(final File file, final PrintStream writer) { - this.path = (file != null? file.getAbsolutePath(): ""); + this.path = (file != null ? file.getAbsolutePath() : ""); this.writer = writer; } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java index 4e9b95e5b8..ab63b70f21 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqConstants.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqConstants.java @@ -27,8 +27,8 @@ * @author alecw@broadinstitute.org */ public class FastqConstants { - public static final String SEQUENCE_HEADER = "@" ; - public static final String QUALITY_HEADER = "+" ; + public static final String SEQUENCE_HEADER = "@"; + public static final String QUALITY_HEADER = "+"; public static final String FIRST_OF_PAIR = "/1"; public static final String SECOND_OF_PAIR = "/2"; @@ -38,14 +38,15 @@ public enum FastqExtensions { FQ(".fq"), FQ_GZ(".fq.gz"), BFQ(".bfq"); - + private final String extension; - + private FastqExtensions(final String extension) { this.extension = extension; } - - public String getExtension() { return this.extension; } - + + public String getExtension() { + return this.extension; + } } } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java index 696f53b137..bb42eb09b1 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqEncoder.java @@ -29,7 +29,6 @@ import htsjdk.samtools.SAMTag; import htsjdk.samtools.TextTagCodec; import htsjdk.samtools.util.SequenceUtil; - import java.io.IOException; import java.util.Map; import java.util.function.BiConsumer; @@ -49,7 +48,7 @@ private FastqEncoder() {} */ public static String encode(final FastqRecord record) { // reserve some memory based on the read length - int capacity = record.getReadLength() * 2 + 5; + int capacity = record.getReadLength() * 2 + 5; // reserve some memory based on the read name if (record.getReadName() != null) { capacity += record.getReadName().length(); @@ -61,17 +60,20 @@ public static String encode(final FastqRecord record) { * Writes a FastqRecord into the Appendable output. * @throws SAMException if any I/O error occurs. */ - public static Appendable write(final Appendable out,final FastqRecord record) { + public static Appendable write(final Appendable out, final FastqRecord record) { final String readName = record.getReadName(); final String readString = record.getReadString(); final String qualHeader = record.getBaseQualityHeader(); final String qualityString = record.getBaseQualityString(); try { return out.append(FastqConstants.SEQUENCE_HEADER) - .append(readName == null ? "" : readName).append('\n') - .append(readString == null ? "" : readString).append('\n') + .append(readName == null ? "" : readName) + .append('\n') + .append(readString == null ? "" : readString) + .append('\n') .append(FastqConstants.QUALITY_HEADER) - .append(qualHeader == null ? "" : qualHeader).append('\n') + .append(qualHeader == null ? "" : qualHeader) + .append('\n') .append(qualityString == null ? "" : qualityString); } catch (IOException e) { throw new SAMException(e); @@ -92,10 +94,11 @@ public static String encode(final SAMRecord record) { */ public static FastqRecord asFastqRecord(final SAMRecord record) { String readName = record.getReadName(); - if(record.getReadPairedFlag() && (record.getFirstOfPairFlag() || record.getSecondOfPairFlag())) { + if (record.getReadPairedFlag() && (record.getFirstOfPairFlag() || record.getSecondOfPairFlag())) { readName += (record.getFirstOfPairFlag()) ? FastqConstants.FIRST_OF_PAIR : FastqConstants.SECOND_OF_PAIR; } - return new FastqRecord(readName, record.getReadString(), record.getStringAttribute(SAMTag.CO), record.getBaseQualityString()); + return new FastqRecord( + readName, record.getReadString(), record.getStringAttribute(SAMTag.CO), record.getBaseQualityString()); } /** @@ -115,7 +118,8 @@ public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeade * @param header header for the returned object. * @param custom function to customize encoding. Note that default information might be overriden. */ - public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeader header, final BiConsumer custom) { + public static SAMRecord asSAMRecord( + final FastqRecord record, final SAMFileHeader header, final BiConsumer custom) { // construct the SAMRecord and set the unmapped flag final SAMRecord samRecord = new SAMRecord(header); samRecord.setReadUnmappedFlag(true); @@ -135,13 +139,12 @@ public static SAMRecord asSAMRecord(final FastqRecord record, final SAMFileHeade *

    Note that all tabs present in the quality header are replaced by spaces. */ public static final BiConsumer QUALITY_HEADER_TO_COMMENT_TAG = (record, samRecord) -> - samRecord.setAttribute(SAMTag.CO, record.getBaseQualityHeader().replaceAll("\t", " ")); - + samRecord.setAttribute(SAMTag.CO, record.getBaseQualityHeader().replaceAll("\t", " ")); public static final BiConsumer QUALITY_HEADER_PARSE_SAM_TAGS = (record, samRecord) -> { final String[] tokens = record.getBaseQualityHeader().split("\t"); final TextTagCodec codec = new TextTagCodec(); - for (final String token: tokens) { + for (final String token : tokens) { final Map.Entry tagValue = codec.decode(token); samRecord.setAttribute(tagValue.getKey(), tagValue.getValue()); } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqReader.java b/src/main/java/htsjdk/samtools/fastq/FastqReader.java index c5d52f8dce..b62904b4b7 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqReader.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqReader.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.StringUtil; - import java.io.BufferedReader; import java.io.Closeable; import java.io.File; @@ -54,20 +53,23 @@ protected enum LineType { this.printable = printable; } - @Override public String toString() { return this.printable; } + @Override + public String toString() { + return this.printable; + } } - final private File fastqFile; - final private BufferedReader reader; + private final File fastqFile; + private final BufferedReader reader; private FastqRecord nextRecord; - private int line=1; + private int line = 1; - final private boolean skipBlankLines; + private final boolean skipBlankLines; public FastqReader(final File file) { - this(file,false); + this(file, false); } - + /** * Constructor * @param file of FASTQ to read read. Will be opened with htsjdk.samtools.util.IOUtil.openFileForBufferedReading @@ -87,7 +89,7 @@ public FastqReader(final BufferedReader reader) { * @param reader input reader . Will be closed by the close method * @param skipBlankLines should we skip blank lines ? */ - public FastqReader(final File file, final BufferedReader reader,boolean skipBlankLines) { + public FastqReader(final File file, final BufferedReader reader, boolean skipBlankLines) { this.fastqFile = file; this.reader = reader; this.skipBlankLines = skipBlankLines; @@ -95,19 +97,20 @@ public FastqReader(final File file, final BufferedReader reader,boolean skipBlan } public FastqReader(final File file, final BufferedReader reader) { - this(file,reader,false); + this(file, reader, false); } private FastqRecord readNextRecord() { try { // Read sequence header final String seqHeader = readLineConditionallySkippingBlanks(); - if (seqHeader == null) return null ; + if (seqHeader == null) return null; if (StringUtil.isBlank(seqHeader)) { throw new SAMException(error("Missing sequence header")); } if (!seqHeader.startsWith(FastqConstants.SEQUENCE_HEADER)) { - throw new SAMException(error("Sequence header must start with " + FastqConstants.SEQUENCE_HEADER + ": " + seqHeader)); + throw new SAMException( + error("Sequence header must start with " + FastqConstants.SEQUENCE_HEADER + ": " + seqHeader)); } // Read sequence line @@ -118,7 +121,8 @@ private FastqRecord readNextRecord() { final String qualHeader = readLineConditionallySkippingBlanks(); checkLine(qualHeader, LineType.QualityHeader); if (!qualHeader.startsWith(FastqConstants.QUALITY_HEADER)) { - throw new SAMException(error("Quality header must start with " + FastqConstants.QUALITY_HEADER + ": "+ qualHeader)); + throw new SAMException( + error("Quality header must start with " + FastqConstants.QUALITY_HEADER + ": " + qualHeader)); } // Read quality line @@ -130,10 +134,13 @@ private FastqRecord readNextRecord() { throw new SAMException(error("Sequence and quality line must be the same length")); } - final FastqRecord frec = new FastqRecord(seqHeader.substring(1, seqHeader.length()), seqLine, - qualHeader.substring(1, qualHeader.length()), qualLine); - line += 4 ; - return frec ; + final FastqRecord frec = new FastqRecord( + seqHeader.substring(1, seqHeader.length()), + seqLine, + qualHeader.substring(1, qualHeader.length()), + qualLine); + line += 4; + return frec; } catch (IOException e) { throw new SAMException(error(e.getMessage()), e); @@ -141,7 +148,9 @@ private FastqRecord readNextRecord() { } @Override - public boolean hasNext() { return nextRecord != null; } + public boolean hasNext() { + return nextRecord != null; + } @Override public FastqRecord next() { @@ -154,7 +163,9 @@ public FastqRecord next() { } @Override - public void remove() { throw new UnsupportedOperationException("Unsupported operation"); } + public void remove() { + throw new UnsupportedOperationException("Unsupported operation"); + } /** * WARNING: Despite the fact that this class implements Iterable, calling iterator() method does not @@ -162,15 +173,20 @@ public FastqRecord next() { * directly. It is provided so that this class can be used in Java for-each loop. */ @Override - public Iterator iterator() { return this; } - - public int getLineNumber() { return line ; } + public Iterator iterator() { + return this; + } + public int getLineNumber() { + return line; + } /** * @return Name of FASTQ being read, or null if not known. */ - public File getFile() { return fastqFile ; } + public File getFile() { + return fastqFile; + } @Override public void close() { @@ -206,7 +222,7 @@ private String readLineConditionallySkippingBlanks() throws IOException { do { line = reader.readLine(); if (line == null) return line; - } while(skipBlankLines && StringUtil.isBlank(line)); + } while (skipBlankLines && StringUtil.isBlank(line)); return line; } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java index 4439c0d51b..aa691299be 100755 --- a/src/main/java/htsjdk/samtools/fastq/FastqRecord.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqRecord.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.util.StringUtil; - import java.io.Serializable; /** @@ -47,7 +46,8 @@ public class FastqRecord implements Serializable { * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) * @param baseQualities the base quality scores */ - public FastqRecord(final String readName, final String readBases, final String qualityHeader, final String baseQualities) { + public FastqRecord( + final String readName, final String readBases, final String qualityHeader, final String baseQualities) { if (readName != null && !readName.isEmpty()) { this.readName = readName; } else { @@ -70,7 +70,8 @@ public FastqRecord(final String readName, final String readBases, final String q * @param qualityHeader the quality header (without {@link FastqConstants#SEQUENCE_HEADER}) * @param baseQualities the base qualities as binary PHRED scores (not ASCII) */ - public FastqRecord(final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { + public FastqRecord( + final String readName, final byte[] readBases, final String qualityHeader, final byte[] baseQualities) { this(readName, StringUtil.bytesToString(readBases), qualityHeader, SAMUtils.phredToFastq(baseQualities)); } @@ -175,46 +176,31 @@ public int length() { public int hashCode() { final int prime = 31; int result = 1; - result = prime - * result - + ((qualityHeader == null) ? 0 : qualityHeader.hashCode()); - result = prime * result - + ((baseQualityString == null) ? 0 : baseQualityString.hashCode()); - result = prime * result - + ((readName == null) ? 0 : readName.hashCode()); + result = prime * result + ((qualityHeader == null) ? 0 : qualityHeader.hashCode()); + result = prime * result + ((baseQualityString == null) ? 0 : baseQualityString.hashCode()); + result = prime * result + ((readName == null) ? 0 : readName.hashCode()); result = prime * result + ((readString == null) ? 0 : readString.hashCode()); return result; } @Override public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; + if (this == obj) return true; + if (obj == null) return false; + if (getClass() != obj.getClass()) return false; FastqRecord other = (FastqRecord) obj; if (readString == null) { - if (other.readString != null) - return false; - } else if (!readString.equals(other.readString)) - return false; + if (other.readString != null) return false; + } else if (!readString.equals(other.readString)) return false; if (qualityHeader == null) { - if (other.qualityHeader != null) - return false; - } else if (!qualityHeader.equals(other.qualityHeader)) - return false; + if (other.qualityHeader != null) return false; + } else if (!qualityHeader.equals(other.qualityHeader)) return false; if (baseQualityString == null) { - if (other.baseQualityString != null) - return false; - } else if (!baseQualityString.equals(other.baseQualityString)) - return false; + if (other.baseQualityString != null) return false; + } else if (!baseQualityString.equals(other.baseQualityString)) return false; if (readName == null) { - if (other.readName != null) - return false; - } else if (!readName.equals(other.readName)) - return false; + if (other.readName != null) return false; + } else if (!readName.equals(other.readName)) return false; return true; } diff --git a/src/main/java/htsjdk/samtools/fastq/FastqWriter.java b/src/main/java/htsjdk/samtools/fastq/FastqWriter.java index 2b4b0f7d11..87920afbd8 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqWriter.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqWriter.java @@ -1,7 +1,6 @@ package htsjdk.samtools.fastq; import htsjdk.io.Writer; - import java.io.Closeable; /** @@ -14,4 +13,4 @@ public interface FastqWriter extends Closeable, Writer { @Override void close(); -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/fastq/FastqWriterFactory.java b/src/main/java/htsjdk/samtools/fastq/FastqWriterFactory.java index 273e3524ee..6f0663c595 100644 --- a/src/main/java/htsjdk/samtools/fastq/FastqWriterFactory.java +++ b/src/main/java/htsjdk/samtools/fastq/FastqWriterFactory.java @@ -1,7 +1,6 @@ package htsjdk.samtools.fastq; import htsjdk.samtools.Defaults; - import java.io.File; /** @@ -11,20 +10,23 @@ */ public class FastqWriterFactory { boolean useAsyncIo = Defaults.USE_ASYNC_IO_WRITE_FOR_SAMTOOLS; - boolean createMd5 = Defaults.CREATE_MD5; + boolean createMd5 = Defaults.CREATE_MD5; /** Sets whether or not to use async io (i.e. a dedicated thread per writer. */ - public void setUseAsyncIo(final boolean useAsyncIo) { this.useAsyncIo = useAsyncIo; } + public void setUseAsyncIo(final boolean useAsyncIo) { + this.useAsyncIo = useAsyncIo; + } /** If true, compute MD5 and write appropriately-named file when file is closed. */ - public void setCreateMd5(final boolean createMd5) { this.createMd5 = createMd5; } + public void setCreateMd5(final boolean createMd5) { + this.createMd5 = createMd5; + } public FastqWriter newWriter(final File out) { final FastqWriter writer = new BasicFastqWriter(out, createMd5); if (useAsyncIo) { return new AsyncFastqWriter(writer, AsyncFastqWriter.DEFAULT_QUEUE_SIZE); - } - else { + } else { return writer; } } diff --git a/src/main/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java b/src/main/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java index 91637359b6..a74c96d1c7 100644 --- a/src/main/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java +++ b/src/main/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java @@ -23,12 +23,13 @@ */ package htsjdk.samtools.filter; +import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.RuntimeScriptException; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.io.StringReader; - import javax.script.Bindings; import javax.script.Compilable; import javax.script.CompiledScript; @@ -37,16 +38,13 @@ import javax.script.ScriptException; import javax.script.SimpleBindings; -import htsjdk.samtools.util.CloserUtil; -import htsjdk.samtools.util.RuntimeScriptException; - /** * Javascript filter with HEADER type containing TYPE records. contains two * static method to get a SAM Read filter or a VariantFilter. - * + * * warning: tools, like galaxy, using this class are not safe because a script * can access the filesystem. - * + * * @author Pierre Lindenbaum PhD */ public abstract class AbstractJavascriptFilter { @@ -75,7 +73,7 @@ protected AbstractJavascriptFilter(final String scriptExpression, final HEADER h /** * Constructor, compiles script, put header in the bindings - * + * * @param scriptReader * reader containing the script. will be closed. * @param header @@ -87,13 +85,12 @@ protected AbstractJavascriptFilter(final Reader scriptReader, final HEADER heade final ScriptEngine engine = manager.getEngineByName("js"); if (engine == null) { CloserUtil.close(scriptReader); - throw new RuntimeScriptException("The embedded 'javascript' engine is not available in java. " - + "Do you use the SUN/Oracle Java Runtime ?"); + throw new RuntimeScriptException(noJsEngineMessage(this.getClass().getSimpleName())); } if (scriptReader == null) { throw new RuntimeScriptException("missing ScriptReader."); } - + try { final Compilable compilingEngine = getCompilable(engine); this.script = compilingEngine.compile(scriptReader); @@ -111,6 +108,30 @@ protected AbstractJavascriptFilter(final Reader scriptReader, final HEADER heade this.bindings.put(DEFAULT_HEADER_KEY, header); } + static String noJsEngineMessage(final String filterClassName) { + return String.join( + "\n", + "No JSR-223 JavaScript engine (lookup name \"js\") was found on the classpath.", + "", + "Starting with htsjdk 5.0.0, htsjdk no longer ships a JavaScript engine as a runtime", + "dependency, so that consumers who do not use the JavaScript filter classes do not pay", + "the cost of carrying ~6 extra jars (nashorn-core plus its ASM transitives, ~2.5 MB).", + "", + "To use " + filterClassName + ", add a JSR-223-compatible JavaScript engine to your", + "runtime classpath. The recommended choice is OpenJDK Nashorn:", + "", + " Gradle: runtimeOnly 'org.openjdk.nashorn:nashorn-core:15.7'", + "", + " Maven: ", + " org.openjdk.nashorn", + " nashorn-core", + " 15.7", + " runtime", + " ", + "", + "Any other JSR-223 engine that registers under the name \"js\" will also work."); + } + /** return a javascript engine as a Compilable */ private static Compilable getCompilable(final ScriptEngine engine) { if (!(engine instanceof Compilable)) { @@ -130,7 +151,7 @@ public String getHeaderKey() { /** * Evaluates this predicate on the given argument - * + * * @param record * the record to test. It will be inject in the javascript * context using getRecordKey() diff --git a/src/main/java/htsjdk/samtools/filter/AggregateFilter.java b/src/main/java/htsjdk/samtools/filter/AggregateFilter.java index 62b804b79b..99917794aa 100644 --- a/src/main/java/htsjdk/samtools/filter/AggregateFilter.java +++ b/src/main/java/htsjdk/samtools/filter/AggregateFilter.java @@ -1,81 +1,80 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; - -import java.util.List; - -/** - * Aggregates multiple filters and provides a method for applying them all to a given record with - * one method call. - * - * $Id$ - */ -public class AggregateFilter implements SamRecordFilter { - - private final List filters; - - /** - * Constructor - * @param filters the list of filters that this Aggregator applies - */ - public AggregateFilter(final List filters) { - this.filters = filters; - } - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * @return true if the SAMRecord matches at least one filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord record) { - for (final SamRecordFilter filter : filters) { - if (filter.filterOut(record)) { - return true; - } - } - return false; - } - - /** - * Determines whether a pair of SAMRecord matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the SAMRecords matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - for (final SamRecordFilter filter : filters) { - if (filter.filterOut(first, second)) { - return true; - } - } - return false; - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; +import java.util.List; + +/** + * Aggregates multiple filters and provides a method for applying them all to a given record with + * one method call. + * + * $Id$ + */ +public class AggregateFilter implements SamRecordFilter { + + private final List filters; + + /** + * Constructor + * @param filters the list of filters that this Aggregator applies + */ + public AggregateFilter(final List filters) { + this.filters = filters; + } + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * @return true if the SAMRecord matches at least one filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord record) { + for (final SamRecordFilter filter : filters) { + if (filter.filterOut(record)) { + return true; + } + } + return false; + } + + /** + * Determines whether a pair of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the SAMRecords matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + for (final SamRecordFilter filter : filters) { + if (filter.filterOut(first, second)) { + return true; + } + } + return false; + } +} diff --git a/src/main/java/htsjdk/samtools/filter/AlignedFilter.java b/src/main/java/htsjdk/samtools/filter/AlignedFilter.java index cebdc0b95b..f91992f0e0 100644 --- a/src/main/java/htsjdk/samtools/filter/AlignedFilter.java +++ b/src/main/java/htsjdk/samtools/filter/AlignedFilter.java @@ -1,89 +1,89 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; - -/** - * Filter to either include or exclude aligned reads - * - * $Id$ - */ -public class AlignedFilter implements SamRecordFilter { - - private boolean includeAligned = false; - - public AlignedFilter(final boolean includeAligned) { - this.includeAligned = includeAligned; - } - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * - * @return true if the SAMRecord matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord record) { - if (includeAligned) { - if (!record.getReadUnmappedFlag()) { - return false; - } - } else { - // exclude aligned - if (record.getReadUnmappedFlag()) { - return false; - } - } - - return true; - } - - /** - * Determines whether a pair of SAMRecord matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the SAMRecords matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - - if (includeAligned) { - // both first and second must be mapped for it to not be filtered out - if (!first.getReadUnmappedFlag() && !second.getReadUnmappedFlag()) { - return false; - } - } else { - // exclude aligned - if either first or second is unmapped don't filter it out - if (first.getReadUnmappedFlag() || second.getReadUnmappedFlag()) { - return false; - } - } - - return true; - } -} \ No newline at end of file +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; + +/** + * Filter to either include or exclude aligned reads + * + * $Id$ + */ +public class AlignedFilter implements SamRecordFilter { + + private boolean includeAligned = false; + + public AlignedFilter(final boolean includeAligned) { + this.includeAligned = includeAligned; + } + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * + * @return true if the SAMRecord matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord record) { + if (includeAligned) { + if (!record.getReadUnmappedFlag()) { + return false; + } + } else { + // exclude aligned + if (record.getReadUnmappedFlag()) { + return false; + } + } + + return true; + } + + /** + * Determines whether a pair of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the SAMRecords matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + + if (includeAligned) { + // both first and second must be mapped for it to not be filtered out + if (!first.getReadUnmappedFlag() && !second.getReadUnmappedFlag()) { + return false; + } + } else { + // exclude aligned - if either first or second is unmapped don't filter it out + if (first.getReadUnmappedFlag() || second.getReadUnmappedFlag()) { + return false; + } + } + + return true; + } +} diff --git a/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java b/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java index 2fe773f11e..cc5d458b94 100644 --- a/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java +++ b/src/main/java/htsjdk/samtools/filter/DuplicateReadFilter.java @@ -22,6 +22,7 @@ * THE SOFTWARE. */ package htsjdk.samtools.filter; + import htsjdk.samtools.SAMRecord; /** diff --git a/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java b/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java index 661286df31..aac27be4bf 100644 --- a/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java +++ b/src/main/java/htsjdk/samtools/filter/FailsVendorReadQualityFilter.java @@ -1,59 +1,59 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; - -/** - * Filter for filtering out reads that do not pass the quality filter - * - * $Id$ - */ -public class FailsVendorReadQualityFilter implements SamRecordFilter { - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * @return true if the SAMRecord matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord record) { - return record.getReadFailsVendorQualityCheckFlag(); - } - - /** - * Determines whether a pair of SAMRecord matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the SAMRecords matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - // if either fails, exclude them both - return (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag()); - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; + +/** + * Filter for filtering out reads that do not pass the quality filter + * + * $Id$ + */ +public class FailsVendorReadQualityFilter implements SamRecordFilter { + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * @return true if the SAMRecord matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord record) { + return record.getReadFailsVendorQualityCheckFlag(); + } + + /** + * Determines whether a pair of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the SAMRecords matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + // if either fails, exclude them both + return (first.getReadFailsVendorQualityCheckFlag() || second.getReadFailsVendorQualityCheckFlag()); + } +} diff --git a/src/main/java/htsjdk/samtools/filter/FilteringIterator.java b/src/main/java/htsjdk/samtools/filter/FilteringIterator.java index 4cdaebe89c..1cbac0b5ac 100644 --- a/src/main/java/htsjdk/samtools/filter/FilteringIterator.java +++ b/src/main/java/htsjdk/samtools/filter/FilteringIterator.java @@ -25,7 +25,6 @@ package htsjdk.samtools.filter; import htsjdk.samtools.SAMRecord; - import java.util.Iterator; /** @@ -38,16 +37,16 @@ * * @deprecated use {@link FilteringSamIterator} instead */ +@Deprecated +/** use {@link FilteringSamIterator} instead **/ +public class FilteringIterator extends FilteringSamIterator { -@Deprecated /** use {@link FilteringSamIterator} instead **/ -public class FilteringIterator extends FilteringSamIterator{ - - public FilteringIterator(final Iterator iterator, final SamRecordFilter filter, final boolean filterByPair) { + public FilteringIterator( + final Iterator iterator, final SamRecordFilter filter, final boolean filterByPair) { super(iterator, filter, filterByPair); } public FilteringIterator(final Iterator iterator, final SamRecordFilter filter) { super(iterator, filter); } - } diff --git a/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java b/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java index a70156ad6e..9d08f7c232 100644 --- a/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java +++ b/src/main/java/htsjdk/samtools/filter/FilteringSamIterator.java @@ -31,7 +31,6 @@ import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.PeekableIterator; - import java.util.Iterator; import java.util.NoSuchElementException; @@ -57,11 +56,11 @@ public class FilteringSamIterator implements CloseableIterator { * @param filter the filter (which may be a FilterAggregator) * @param filterByPair if true, filter reads in pairs */ - public FilteringSamIterator(final Iterator iterator, final SamRecordFilter filter, - final boolean filterByPair) { + public FilteringSamIterator( + final Iterator iterator, final SamRecordFilter filter, final boolean filterByPair) { if (filterByPair && iterator instanceof SAMRecordIterator) { - ((SAMRecordIterator)iterator).assertSorted(SAMFileHeader.SortOrder.queryname); + ((SAMRecordIterator) iterator).assertSorted(SAMFileHeader.SortOrder.queryname); } this.iterator = new PeekableIterator(iterator); @@ -134,8 +133,7 @@ private SAMRecord getNextRecord() { while (iterator.hasNext()) { final SAMRecord record = iterator.next(); - if (filterReadPairs && record.getReadPairedFlag() && record.getFirstOfPairFlag() && - iterator.hasNext()) { + if (filterReadPairs && record.getReadPairedFlag() && record.getFirstOfPairFlag() && iterator.hasNext()) { SamPairUtil.assertMate(record, iterator.peek()); @@ -145,8 +143,7 @@ private SAMRecord getNextRecord() { } else { return record; } - } else if (filterReadPairs && record.getReadPairedFlag() && - record.getSecondOfPairFlag()) { + } else if (filterReadPairs && record.getReadPairedFlag() && record.getSecondOfPairFlag()) { // assume that we did a pass(first, second) and it passed the filter return record; } else if (!filter.filterOut(record)) { @@ -156,4 +153,4 @@ private SAMRecord getNextRecord() { return null; } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/filter/InsertSizeFilter.java b/src/main/java/htsjdk/samtools/filter/InsertSizeFilter.java index 1e99fa92b8..2d0dfa2817 100644 --- a/src/main/java/htsjdk/samtools/filter/InsertSizeFilter.java +++ b/src/main/java/htsjdk/samtools/filter/InsertSizeFilter.java @@ -28,4 +28,4 @@ public boolean filterOut(final SAMRecord rec) { public boolean filterOut(final SAMRecord r1, final SAMRecord r2) { return filterOut(r1) || filterOut(r2); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/filter/IntervalFilter.java b/src/main/java/htsjdk/samtools/filter/IntervalFilter.java index ef5c98a3ff..bc54af2ada 100644 --- a/src/main/java/htsjdk/samtools/filter/IntervalFilter.java +++ b/src/main/java/htsjdk/samtools/filter/IntervalFilter.java @@ -27,7 +27,6 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.IntervalUtil; - import java.util.Iterator; import java.util.List; @@ -45,12 +44,13 @@ public class IntervalFilter implements SamRecordFilter { * Null only if there are no more intervals */ private final SAMFileHeader samHeader; + private Interval currentInterval; private int currentSequenceIndex; /** * Prepare to filter out SAMRecords that do not overlap the given list of intervals - * @param intervals -- must be locus-ordered & non-overlapping + * @param intervals -- must be locus-ordered and non-overlapping */ public IntervalFilter(final List intervals, final SAMFileHeader samHeader) { this.samHeader = samHeader; @@ -67,14 +67,16 @@ public IntervalFilter(final List intervals, final SAMFileHeader samHea */ @Override public boolean filterOut(final SAMRecord record) { - while (currentInterval != null && - (currentSequenceIndex < record.getReferenceIndex() || - (currentSequenceIndex == record.getReferenceIndex() && currentInterval.getEnd() < record.getAlignmentStart()))) { + while (currentInterval != null + && (currentSequenceIndex < record.getReferenceIndex() + || (currentSequenceIndex == record.getReferenceIndex() + && currentInterval.getEnd() < record.getAlignmentStart()))) { advanceInterval(); } // Return true if record should be filtered out - return !(currentInterval != null && currentSequenceIndex == record.getReferenceIndex() && - currentInterval.getStart() <= record.getAlignmentEnd()); + return !(currentInterval != null + && currentSequenceIndex == record.getReferenceIndex() + && currentInterval.getStart() <= record.getAlignmentEnd()); } private void advanceInterval() { @@ -99,6 +101,7 @@ public boolean filterOut(final SAMRecord first, final SAMRecord second) { // This can never be implemented because if the bam is coordinate sorted, // which it has to be for this filter, it will never get both the first and second reads together // and the filterOut method goes in order of the intervals in coordinate order so it will miss reads. - throw new UnsupportedOperationException("Paired IntervalFilter filter cannot be implemented, use IntervalKeepPairFilter."); + throw new UnsupportedOperationException( + "Paired IntervalFilter filter cannot be implemented, use IntervalKeepPairFilter."); } } diff --git a/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java b/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java index 9a6d564020..593192c56d 100644 --- a/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java +++ b/src/main/java/htsjdk/samtools/filter/IntervalKeepPairFilter.java @@ -27,7 +27,6 @@ import htsjdk.samtools.SAMUtils; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.OverlapDetector; - import java.util.Collection; import java.util.List; @@ -68,7 +67,7 @@ public IntervalKeepPairFilter(final List intervals) { @Override public boolean filterOut(final SAMRecord record) { if (record.isSecondaryOrSupplementary() || !record.getReadPairedFlag()) { - return true; + return true; } if (!record.getReadUnmappedFlag() @@ -76,8 +75,11 @@ && hasOverlaps(record.getReferenceName(), record.getStart(), record.getEnd())) { return false; } - return record.getMateUnmappedFlag() || !hasOverlaps(record.getMateReferenceName(), - record.getMateAlignmentStart(), SAMUtils.getMateAlignmentEnd(record)); + return record.getMateUnmappedFlag() + || !hasOverlaps( + record.getMateReferenceName(), + record.getMateAlignmentStart(), + SAMUtils.getMateAlignmentEnd(record)); } /** diff --git a/src/main/java/htsjdk/samtools/filter/InvertFilter.java b/src/main/java/htsjdk/samtools/filter/InvertFilter.java index f692ff2f8a..1d972c109b 100644 --- a/src/main/java/htsjdk/samtools/filter/InvertFilter.java +++ b/src/main/java/htsjdk/samtools/filter/InvertFilter.java @@ -25,8 +25,6 @@ import htsjdk.samtools.SAMRecord; -import java.util.List; - /** * Wraps an existing filter and inverts it. If the wrapped * filter would have filtered something out this will keep it and vice versa. @@ -64,6 +62,6 @@ public boolean filterOut(final SAMRecord record) { */ @Override public boolean filterOut(final SAMRecord first, final SAMRecord second) { - return !filter.filterOut(first,second); + return !filter.filterOut(first, second); } } diff --git a/src/main/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java b/src/main/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java index 20c784b5b0..0a452dc3d9 100644 --- a/src/main/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java +++ b/src/main/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java @@ -23,32 +23,41 @@ */ package htsjdk.samtools.filter; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; import java.io.File; import java.io.IOException; import java.io.Reader; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; - /** - * javascript based read filter - * - * - * The script puts the following variables in the script context: + * JavaScript-based {@link SamRecordFilter}. + * + *

    The user-supplied script is evaluated against each {@link SAMRecord} with the following + * variables in scope: + * + *

      + *
    • {@code record} - the {@link SAMRecord} being evaluated
    • + *
    • {@code header} - the {@link SAMFileHeader} associated with the reader
    • + *
    + * + *

    Example: keep only records with mapping quality >= 30: + *

    {@code
    + *     new JavascriptSamRecordFilter("record.getMappingQuality() >= 30;", header)
    + * }
    + * + *

    Runtime requirement: as of htsjdk 5.0.0, htsjdk does not ship a JavaScript engine as + * a runtime dependency. To use this class, add a JSR-223-compatible JavaScript engine + * (e.g. {@code org.openjdk.nashorn:nashorn-core}) to your runtime classpath. If no engine is + * available, the constructor throws a {@link htsjdk.samtools.util.RuntimeScriptException} whose + * message lists the dependency coordinates. * - * - 'record' a SamRecord ( - * https://github.com/samtools/htsjdk/blob/master/src/java/htsjdk/samtools/ - * SAMRecord.java ) - 'header' ( - * https://github.com/samtools/htsjdk/blob/master/src/java/htsjdk/samtools/ - * SAMFileHeader.java ) - * * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France */ public class JavascriptSamRecordFilter extends AbstractJavascriptFilter implements SamRecordFilter { /** * constructor using a javascript File - * + * * @param scriptFile * the javascript file to be compiled * @param header @@ -60,7 +69,7 @@ public JavascriptSamRecordFilter(final File scriptFile, final SAMFileHeader head /** * constructor using a javascript expression - * + * * @param scriptExpression * the javascript expression to be compiled * @param header @@ -72,7 +81,7 @@ public JavascriptSamRecordFilter(final String scriptExpression, final SAMFileHea /** * constructor using a java.io.Reader - * + * * @param scriptReader * the javascript reader to be compiled. will be closed * @param header diff --git a/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java b/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java index 5a809613c5..d5c7c4f90b 100644 --- a/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java +++ b/src/main/java/htsjdk/samtools/filter/NotPrimaryAlignmentFilter.java @@ -30,6 +30,4 @@ * @deprecated use {@link SecondaryAlignmentFilter} instead. */ @Deprecated -public class NotPrimaryAlignmentFilter extends SecondaryAlignmentFilter { - -} +public class NotPrimaryAlignmentFilter extends SecondaryAlignmentFilter {} diff --git a/src/main/java/htsjdk/samtools/filter/OverclippedReadFilter.java b/src/main/java/htsjdk/samtools/filter/OverclippedReadFilter.java index 2e8f43f9e8..afde83aec9 100644 --- a/src/main/java/htsjdk/samtools/filter/OverclippedReadFilter.java +++ b/src/main/java/htsjdk/samtools/filter/OverclippedReadFilter.java @@ -37,7 +37,8 @@ public class OverclippedReadFilter implements SamRecordFilter { // if the number of unclipped bases is below this threshold, the read is considered overclipped private final int unclippedBasesThreshold; - // if set to true, then reads with at least one clipped end will be filtered; if false, we require both ends to be clipped + // if set to true, then reads with at least one clipped end will be filtered; if false, we require both ends to be + // clipped private final boolean filterSingleEndClips; public OverclippedReadFilter(final int unclippedBasesThreshold, final boolean filterSingleEndClips) { @@ -53,20 +54,21 @@ public boolean filterOut(final SAMRecord record) { int minSoftClipBlocks = filterSingleEndClips ? 1 : 2; CigarOperator lastOperator = null; - for ( final CigarElement element : record.getCigar().getCigarElements() ) { - if ( element.getOperator() == CigarOperator.S ) { - //Treat consecutive S blocks as a single one - if(lastOperator != CigarOperator.S){ + for (final CigarElement element : record.getCigar().getCigarElements()) { + if (element.getOperator() == CigarOperator.S) { + // Treat consecutive S blocks as a single one + if (lastOperator != CigarOperator.S) { softClipBlocks += 1; } - } else if ( element.getOperator().consumesReadBases() ) { // M, I, X, and EQ (S was already accounted for above) + } else if (element.getOperator() + .consumesReadBases()) { // M, I, X, and EQ (S was already accounted for above) alignedLength += element.getLength(); } lastOperator = element.getOperator(); } - return(alignedLength < unclippedBasesThreshold && softClipBlocks >= minSoftClipBlocks); + return (alignedLength < unclippedBasesThreshold && softClipBlocks >= minSoftClipBlocks); } @Override diff --git a/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java b/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java index dfda870d06..a499c0f261 100644 --- a/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java +++ b/src/main/java/htsjdk/samtools/filter/ReadNameFilter.java @@ -1,112 +1,108 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMException; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.util.IOUtil; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * Filter by a set of specified readnames - *

    - * $Id$ - */ -public class ReadNameFilter implements SamRecordFilter { - - private boolean includeReads = false; - private Set readNameFilterSet = new HashSet<>(); - - public ReadNameFilter(final File readNameFilterFile, final boolean includeReads) { - - IOUtil.assertFileIsReadable(readNameFilterFile); - IOUtil.assertFileSizeNonZero(readNameFilterFile); - - try { - final BufferedReader in = IOUtil.openFileForBufferedReading(readNameFilterFile); - - String line = null; - - while ((line = in.readLine()) != null) { - if (!line.trim().isEmpty()) { - readNameFilterSet.add(line.split("\\s+")[0]); - } - } - - in.close(); - } catch (IOException e) { - throw new SAMException(e.getMessage(), e); - } - - this.includeReads = includeReads; - } - - public ReadNameFilter(final Set readNameFilterSet, final boolean includeReads) { - this.readNameFilterSet = readNameFilterSet; - this.includeReads = includeReads; - } - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * - * @return true if the SAMRecord matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord record) { - return readNameFilterSet.contains(record.getReadName()) != includeReads; - } - - /** - * Determines whether a pair of SAMRecords matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the pair of records matches filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - if (includeReads) { - if (readNameFilterSet.contains(first.getReadName()) && - readNameFilterSet.contains(second.getReadName())) { - return false; - } - } else { - if (!readNameFilterSet.contains(first.getReadName()) && - !readNameFilterSet.contains(second.getReadName())) { - return false; - } - } - - return true; - } - -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMException; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.util.IOUtil; +import java.io.BufferedReader; +import java.io.File; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * Filter by a set of specified readnames + *

    + * $Id$ + */ +public class ReadNameFilter implements SamRecordFilter { + + private boolean includeReads = false; + private Set readNameFilterSet = new HashSet<>(); + + public ReadNameFilter(final File readNameFilterFile, final boolean includeReads) { + + IOUtil.assertFileIsReadable(readNameFilterFile); + IOUtil.assertFileSizeNonZero(readNameFilterFile); + + try { + final BufferedReader in = IOUtil.openFileForBufferedReading(readNameFilterFile); + + String line = null; + + while ((line = in.readLine()) != null) { + if (!line.trim().isEmpty()) { + readNameFilterSet.add(line.split("\\s+")[0]); + } + } + + in.close(); + } catch (IOException e) { + throw new SAMException(e.getMessage(), e); + } + + this.includeReads = includeReads; + } + + public ReadNameFilter(final Set readNameFilterSet, final boolean includeReads) { + this.readNameFilterSet = readNameFilterSet; + this.includeReads = includeReads; + } + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * + * @return true if the SAMRecord matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord record) { + return readNameFilterSet.contains(record.getReadName()) != includeReads; + } + + /** + * Determines whether a pair of SAMRecords matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the pair of records matches filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + if (includeReads) { + if (readNameFilterSet.contains(first.getReadName()) && readNameFilterSet.contains(second.getReadName())) { + return false; + } + } else { + if (!readNameFilterSet.contains(first.getReadName()) && !readNameFilterSet.contains(second.getReadName())) { + return false; + } + } + + return true; + } +} diff --git a/src/main/java/htsjdk/samtools/filter/SamRecordFilter.java b/src/main/java/htsjdk/samtools/filter/SamRecordFilter.java index 5ca280e126..8c4800a869 100644 --- a/src/main/java/htsjdk/samtools/filter/SamRecordFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SamRecordFilter.java @@ -1,53 +1,53 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; - -/** - * API for filtering SAMRecords - * - * $Id$ - */ -public interface SamRecordFilter { - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * - * @return true if the SAMRecord matches the filter, otherwise false - */ - public boolean filterOut(SAMRecord record); - - /** - * Determines whether a pair of SAMRecords matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the pair of records matches filter, otherwise false - */ - public boolean filterOut(SAMRecord first, SAMRecord second); -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; + +/** + * API for filtering SAMRecords + * + * $Id$ + */ +public interface SamRecordFilter { + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * + * @return true if the SAMRecord matches the filter, otherwise false + */ + public boolean filterOut(SAMRecord record); + + /** + * Determines whether a pair of SAMRecords matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the pair of records matches filter, otherwise false + */ + public boolean filterOut(SAMRecord first, SAMRecord second); +} diff --git a/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java b/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java index ee694edb87..2a2453c7a7 100644 --- a/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SecondaryAlignmentFilter.java @@ -10,7 +10,9 @@ public class SecondaryAlignmentFilter implements SamRecordFilter { * Returns true if the read is marked as secondary. */ @Override - public boolean filterOut(final SAMRecord record) { return record.isSecondaryAlignment(); } + public boolean filterOut(final SAMRecord record) { + return record.isSecondaryAlignment(); + } /** * Returns true if either read is marked as secondary. diff --git a/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java b/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java index 9c56335a85..3649bf71e2 100644 --- a/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SecondaryOrSupplementaryFilter.java @@ -8,7 +8,7 @@ * in that we did not want to change the functionality of NPSI to no longer match its name * $Id$ */ -public class SecondaryOrSupplementaryFilter implements SamRecordFilter { +public class SecondaryOrSupplementaryFilter implements SamRecordFilter { /** * @param record the SAMRecord to evaluate * @return true if the SAMRecord matches the filter, otherwise false diff --git a/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java b/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java index ce169ef834..fb698bce59 100644 --- a/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java +++ b/src/main/java/htsjdk/samtools/filter/SolexaNoiseFilter.java @@ -1,68 +1,67 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.util.SequenceUtil; - -/** - * Filter to determine whether a read is "noisy" due to a poly-A run that is a sequencing artifact. - * Currently we filter out only reads that are composed entirely of As. - * - * $Id$ - */ -public class SolexaNoiseFilter implements SamRecordFilter { - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * @return true if the SAMRecord matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord record) { - final byte[] sequence = record.getReadBases(); - for (final byte base : sequence) { - if (base != 'A' && base != 'a' && - !SequenceUtil.isNoCall(base)) { - return false; - } - } - return true; - } - - /** - * Determines whether a pair of SAMRecord matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if the SAMRecords matches the filter, otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - // only filter out the pair if both first and second reads have all As - return (filterOut(first) && filterOut(second)); - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.util.SequenceUtil; + +/** + * Filter to determine whether a read is "noisy" due to a poly-A run that is a sequencing artifact. + * Currently we filter out only reads that are composed entirely of As. + * + * $Id$ + */ +public class SolexaNoiseFilter implements SamRecordFilter { + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * @return true if the SAMRecord matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord record) { + final byte[] sequence = record.getReadBases(); + for (final byte base : sequence) { + if (base != 'A' && base != 'a' && !SequenceUtil.isNoCall(base)) { + return false; + } + } + return true; + } + + /** + * Determines whether a pair of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if the SAMRecords matches the filter, otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + // only filter out the pair if both first and second reads have all As + return (filterOut(first) && filterOut(second)); + } +} diff --git a/src/main/java/htsjdk/samtools/filter/TagFilter.java b/src/main/java/htsjdk/samtools/filter/TagFilter.java index 08c9eb0184..b2c21659f2 100644 --- a/src/main/java/htsjdk/samtools/filter/TagFilter.java +++ b/src/main/java/htsjdk/samtools/filter/TagFilter.java @@ -1,117 +1,116 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.filter; - -import htsjdk.samtools.SAMRecord; - -import java.util.Collections; -import java.util.List; - -/** - * Filter class for matching tag attributes in SAMRecords - * - * $Id$ - */ -public class TagFilter implements SamRecordFilter { - - private final String tag; // The key of the tag to match - private final List values; // The list of matching values - private Boolean includeReads; - - /** - * Constructor for a single value - * - * @param tag the key of the tag to match - * @param value the value to match - */ - public TagFilter(String tag, Object value) { - this(tag, Collections.singletonList(value), null); - } - - /** - * Constructor for multiple values - * - * @param tag the key of the tag to match - * @param values the matching values - */ - public TagFilter(String tag, List values) { - this(tag, values, null); - } - - /** - * Constructor for a single value - * - * @param tag the key of the tag to match - * @param value the value to match - * @param includeReads whether to include or not include reads that match filter - */ - public TagFilter(String tag, Object value, final Boolean includeReads) { - this(tag, Collections.singletonList(value), includeReads); - } - - /** - * Constructor for multiple values - * - * @param tag the key of the tag to match - * @param values the matching values - * @param includeReads whether to include or not include reads that match filter - */ - public TagFilter(String tag, List values, final Boolean includeReads) { - this.tag = tag; - this.values = values; - this.includeReads = includeReads == null ? false : includeReads; - } - - /** - * Determines whether a SAMRecord matches this filter - * - * @param record the SAMRecord to evaluate - * @return the XOR of SAMRecord matches the filter and includeReads. - */ - @Override - public boolean filterOut(SAMRecord record) { - return values.contains(record.getAttribute(tag)) != includeReads; - } - - /** - * Determines whether a paired of SAMRecord matches this filter - * - * @param first the first SAMRecord to evaluate - * @param second the second SAMRecord to evaluate - * - * @return true if includeReads is true and neither SAMRecord matches filter - * true if includeReads is false and both SAMRecords match filter - * otherwise false - */ - @Override - public boolean filterOut(final SAMRecord first, final SAMRecord second) { - // With includeReads==true, allow any pairs through that contain the tag value - // With includeReads==false, exclude pairs where both reads contain the tag value - if (includeReads) { - return !(values.contains(first.getAttribute(tag)) || values.contains(second.getAttribute(tag))); - } else { - return values.contains(first.getAttribute(tag)) && values.contains(second.getAttribute(tag)); - } - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.filter; + +import htsjdk.samtools.SAMRecord; +import java.util.Collections; +import java.util.List; + +/** + * Filter class for matching tag attributes in SAMRecords + * + * $Id$ + */ +public class TagFilter implements SamRecordFilter { + + private final String tag; // The key of the tag to match + private final List values; // The list of matching values + private Boolean includeReads; + + /** + * Constructor for a single value + * + * @param tag the key of the tag to match + * @param value the value to match + */ + public TagFilter(String tag, Object value) { + this(tag, Collections.singletonList(value), null); + } + + /** + * Constructor for multiple values + * + * @param tag the key of the tag to match + * @param values the matching values + */ + public TagFilter(String tag, List values) { + this(tag, values, null); + } + + /** + * Constructor for a single value + * + * @param tag the key of the tag to match + * @param value the value to match + * @param includeReads whether to include or not include reads that match filter + */ + public TagFilter(String tag, Object value, final Boolean includeReads) { + this(tag, Collections.singletonList(value), includeReads); + } + + /** + * Constructor for multiple values + * + * @param tag the key of the tag to match + * @param values the matching values + * @param includeReads whether to include or not include reads that match filter + */ + public TagFilter(String tag, List values, final Boolean includeReads) { + this.tag = tag; + this.values = values; + this.includeReads = includeReads == null ? false : includeReads; + } + + /** + * Determines whether a SAMRecord matches this filter + * + * @param record the SAMRecord to evaluate + * @return the XOR of SAMRecord matches the filter and includeReads. + */ + @Override + public boolean filterOut(SAMRecord record) { + return values.contains(record.getAttribute(tag)) != includeReads; + } + + /** + * Determines whether a paired of SAMRecord matches this filter + * + * @param first the first SAMRecord to evaluate + * @param second the second SAMRecord to evaluate + * + * @return true if includeReads is true and neither SAMRecord matches filter + * true if includeReads is false and both SAMRecords match filter + * otherwise false + */ + @Override + public boolean filterOut(final SAMRecord first, final SAMRecord second) { + // With includeReads==true, allow any pairs through that contain the tag value + // With includeReads==false, exclude pairs where both reads contain the tag value + if (includeReads) { + return !(values.contains(first.getAttribute(tag)) || values.contains(second.getAttribute(tag))); + } else { + return values.contains(first.getAttribute(tag)) && values.contains(second.getAttribute(tag)); + } + } +} diff --git a/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java b/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java index 6df3c44548..8dde3bf701 100644 --- a/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java +++ b/src/main/java/htsjdk/samtools/filter/WholeReadClippedFilter.java @@ -46,10 +46,10 @@ public class WholeReadClippedFilter implements SamRecordFilter { @Override public boolean filterOut(final SAMRecord record) { return record.getAttribute(ReservedTagConstants.XT) != null - && (Integer)record.getAttribute(ReservedTagConstants.XT) == 1; + && (Integer) record.getAttribute(ReservedTagConstants.XT) == 1; } - /** + /** * Determines whether a paired of SAMRecord matches this filter * * @param first the first SAMRecord to evaluate diff --git a/src/main/java/htsjdk/samtools/liftover/Chain.java b/src/main/java/htsjdk/samtools/liftover/Chain.java index 45c51ba6bd..67fd9d1c49 100644 --- a/src/main/java/htsjdk/samtools/liftover/Chain.java +++ b/src/main/java/htsjdk/samtools/liftover/Chain.java @@ -28,7 +28,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Interval; import htsjdk.samtools.util.OverlapDetector; - import java.io.File; import java.io.PrintWriter; import java.util.ArrayList; @@ -36,7 +35,6 @@ import java.util.List; import java.util.regex.Pattern; - /** * Holds a single chain from a UCSC chain file. Chain file format is described here: http://genome.ucsc.edu/goldenPath/help/chain.html * @@ -77,6 +75,7 @@ class Chain { final int fromChainStart; /** End of range covered in "from" sequence. */ final int fromChainEnd; + final String toSequenceName; /** Overall size of the "to" sequence. */ final int toSequenceSize; @@ -88,14 +87,24 @@ class Chain { final int toChainEnd; /** ID of chain in file. */ final int id; + private final List blockList = new ArrayList(); /** * Construct a Chain from the parsed header fields. */ - private Chain(final double score, final String fromSequenceName, final int fromSequenceSize, final int fromChainStart, final int fromChainEnd, - final String toSequenceName, final int toSequenceSize, final boolean toOppositeStrand, - final int toChainStart, final int toChainEnd, final int id) { + private Chain( + final double score, + final String fromSequenceName, + final int fromSequenceSize, + final int fromChainStart, + final int fromChainEnd, + final String toSequenceName, + final int toSequenceSize, + final boolean toOppositeStrand, + final int toChainStart, + final int toChainEnd, + final int id) { // Convert to one-based, inclusive for Interval. interval = new Interval(fromSequenceName, fromChainStart + 1, fromChainEnd); this.score = score; @@ -105,7 +114,7 @@ private Chain(final double score, final String fromSequenceName, final int fromS this.toSequenceSize = toSequenceSize; this.toChainStart = toChainStart; // not used - //this.score = score; + // this.score = score; this.fromChainEnd = fromChainEnd; this.fromSequenceName = fromSequenceName; this.fromSequenceSize = fromSequenceSize; @@ -113,16 +122,15 @@ private Chain(final double score, final String fromSequenceName, final int fromS this.id = id; } - /** * Holds a range that continuously lines up between target and query genome builds. * Indices are 0-based, half-open. */ static class ContinuousBlock { - final int fromStart; /* Start of range covered in "from". */ - final int toStart; /* Range covered in "to". */ - final int blockLength; /* length of continuous block of that maps btw from and to */ - //int score; /* Score of block. */ + final int fromStart; /* Start of range covered in "from". */ + final int toStart; /* Range covered in "to". */ + final int blockLength; /* length of continuous block of that maps btw from and to */ + // int score; /* Score of block. */ private ContinuousBlock(final int fromStart, final int toStart, final int blockLength) { this.fromStart = fromStart; @@ -186,12 +194,22 @@ List getBlocks() { } void write(final PrintWriter writer) { - writer.printf("chain\t%f\t%s\t%d\t+\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\n", - score, fromSequenceName, fromSequenceSize, fromChainStart, fromChainEnd, - toSequenceName, toSequenceSize, (toOppositeStrand ? "-": "+"), toChainStart, toChainEnd, id); + writer.printf( + "chain\t%f\t%s\t%d\t+\t%d\t%d\t%s\t%d\t%s\t%d\t%d\t%d\n", + score, + fromSequenceName, + fromSequenceSize, + fromChainStart, + fromChainEnd, + toSequenceName, + toSequenceSize, + (toOppositeStrand ? "-" : "+"), + toChainStart, + toChainEnd, + id); for (int i = 0; i < blockList.size() - 1; ++i) { final ContinuousBlock thisBlock = blockList.get(i); - final ContinuousBlock nextBlock = blockList.get(i+1); + final ContinuousBlock nextBlock = blockList.get(i + 1); final int fromGap = nextBlock.fromStart - thisBlock.getFromEnd(); final int toGap = nextBlock.toStart - thisBlock.getToEnd(); @@ -215,10 +233,12 @@ void validate() { validatePositive("from length", fromLength); int toLength = toChainEnd - toChainStart; validatePositive("to length", toLength); - if (fromLength > fromSequenceSize) throw new SAMException("From chain length (" + fromLength + - ") < from sequence length (" + fromSequenceSize + ") for chain " + id); - if (toLength > toSequenceSize) throw new SAMException("To chain length (" + toLength + - ") < to sequence length (" + toSequenceSize + ") for chain " + id); + if (fromLength > fromSequenceSize) + throw new SAMException("From chain length (" + fromLength + ") < from sequence length (" + fromSequenceSize + + ") for chain " + id); + if (toLength > toSequenceSize) + throw new SAMException( + "To chain length (" + toLength + ") < to sequence length (" + toSequenceSize + ") for chain " + id); if (fromSequenceName.isEmpty()) throw new SAMException("Chain " + id + "has empty from sequence name."); if (toSequenceName.isEmpty()) throw new SAMException("Chain " + id + "has empty to sequence name."); if (blockList.isEmpty()) throw new SAMException("Chain " + id + " has empty block list."); @@ -238,12 +258,14 @@ void validate() { } for (int i = 1; i < blockList.size(); ++i) { final ContinuousBlock thisBlock = blockList.get(i); - final ContinuousBlock prevBlock = blockList.get(i-1); + final ContinuousBlock prevBlock = blockList.get(i - 1); if (thisBlock.fromStart < prevBlock.getFromEnd()) { - throw new SAMException("Continuous block " + i + " from starts before previous block ends for chain " + id); + throw new SAMException( + "Continuous block " + i + " from starts before previous block ends for chain " + id); } if (thisBlock.toStart < prevBlock.getToEnd()) { - throw new SAMException("Continuous block " + i + " to starts before previous block ends for chain " + id); + throw new SAMException( + "Continuous block " + i + " to starts before previous block ends for chain " + id); } } } @@ -277,8 +299,9 @@ public boolean equals(final Object o) { if (toOppositeStrand != chain.toOppositeStrand) return false; if (toSequenceSize != chain.toSequenceSize) return false; if (blockList != null ? !blockList.equals(chain.blockList) : chain.blockList != null) return false; - if (fromSequenceName != null ? !fromSequenceName.equals(chain.fromSequenceName) : chain.fromSequenceName != null) - return false; + if (fromSequenceName != null + ? !fromSequenceName.equals(chain.fromSequenceName) + : chain.fromSequenceName != null) return false; if (interval != null ? !interval.equals(chain.interval) : chain.interval != null) return false; if (toSequenceName != null ? !toSequenceName.equals(chain.toSequenceName) : chain.toSequenceName != null) return false; @@ -314,7 +337,7 @@ public int hashCode() { */ static OverlapDetector loadChains(final File chainFile) { IOUtil.assertFileIsReadable(chainFile); - try(final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile))){ + try (final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile))) { return loadChains(reader, chainFile.toString()); } } @@ -385,8 +408,18 @@ private static Chain loadChain(final BufferedLineReader reader, final String sou } catch (NumberFormatException e) { throwChainFileParseException("Invalid field", sourceName, reader.getLineNumber()); } - final Chain chain = new Chain(score, fromSequenceName, fromSequenceSize, fromChainStart, fromChainEnd, toSequenceName, toSequenceSize, toNegativeStrand, toChainStart, - toChainEnd, id); + final Chain chain = new Chain( + score, + fromSequenceName, + fromSequenceSize, + fromChainStart, + fromChainEnd, + toSequenceName, + toSequenceSize, + toNegativeStrand, + toChainStart, + toChainEnd, + id); int toBlockStart = chain.toChainStart; int fromBlockStart = chain.fromChainStart; boolean sawLastLine = false; @@ -394,18 +427,21 @@ private static Chain loadChain(final BufferedLineReader reader, final String sou line = reader.readLine(); if (line == null || line.equals("")) { if (!sawLastLine) { - throwChainFileParseException("Reached end of chain without seeing terminal block", sourceName, reader.getLineNumber()); + throwChainFileParseException( + "Reached end of chain without seeing terminal block", sourceName, reader.getLineNumber()); } break; } if (sawLastLine) { - throwChainFileParseException("Terminal block seen before end of chain", sourceName, reader.getLineNumber()); + throwChainFileParseException( + "Terminal block seen before end of chain", sourceName, reader.getLineNumber()); } String[] blockFields = SPLITTER.split(line); if (blockFields.length == 1) { sawLastLine = true; } else if (blockFields.length != 3) { - throwChainFileParseException("Block line has unexpected number of fields", sourceName, reader.getLineNumber()); + throwChainFileParseException( + "Block line has unexpected number of fields", sourceName, reader.getLineNumber()); } int size = Integer.parseInt(blockFields[0]); chain.addBlock(fromBlockStart, toBlockStart, size); @@ -413,13 +449,13 @@ private static Chain loadChain(final BufferedLineReader reader, final String sou fromBlockStart += Integer.parseInt(blockFields[1]) + size; toBlockStart += Integer.parseInt(blockFields[2]) + size; } - } chain.validate(); return chain; } - private static void throwChainFileParseException(final String message, final String sourceName, final int lineNumber) { + private static void throwChainFileParseException( + final String message, final String sourceName, final int lineNumber) { throw new SAMException(message + " in chain file " + sourceName + " at line " + lineNumber); } } diff --git a/src/main/java/htsjdk/samtools/liftover/LiftOver.java b/src/main/java/htsjdk/samtools/liftover/LiftOver.java index 9afebd63ca..118d5b5251 100644 --- a/src/main/java/htsjdk/samtools/liftover/LiftOver.java +++ b/src/main/java/htsjdk/samtools/liftover/LiftOver.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.*; - import java.io.File; import java.io.InputStream; import java.util.ArrayList; @@ -37,7 +36,6 @@ import java.util.Map; import java.util.Set; - /** * Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented. * Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive. @@ -46,7 +44,7 @@ */ public class LiftOver { private static final Log LOG = Log.getInstance(LiftOver.class); - + public static final double DEFAULT_LIFTOVER_MINMATCH = 0.95; private double liftOverMinMatch = DEFAULT_LIFTOVER_MINMATCH; @@ -83,7 +81,7 @@ public long getFailedIntervalsBelowThreshold() { /** * Load UCSC chain file in order to lift over Intervals. */ - public LiftOver(File chainFile){ + public LiftOver(File chainFile) { this(Chain.loadChains(chainFile)); } @@ -98,12 +96,11 @@ private LiftOver(OverlapDetector chains) { this.chains = chains; for (final Chain chain : this.chains.getAll()) { final String from = chain.fromSequenceName; - final String to = chain.toSequenceName; + final String to = chain.toSequenceName; final Set names; if (contigMap.containsKey(from)) { names = contigMap.get(from); - } - else { + } else { names = new HashSet<>(); contigMap.put(from, names); } @@ -117,10 +114,10 @@ private LiftOver(OverlapDetector chains) { public void validateToSequences(final SAMSequenceDictionary sequenceDictionary) { for (final Chain chain : chains.getAll()) { if (sequenceDictionary.getSequence(chain.toSequenceName) == null) { - throw new SAMException("Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary."); + throw new SAMException( + "Sequence " + chain.toSequenceName + " from chain file is not found in sequence dictionary."); } } - } /** @@ -141,8 +138,8 @@ public Interval liftOver(final Interval interval) { */ public Interval liftOver(final Interval interval, final double liftOverMinMatch) { if (interval.length() == 0) { - throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + - interval.getName()); + throw new IllegalArgumentException( + "Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } Chain chainHit = null; TargetIntersection targetIntersection = null; @@ -163,10 +160,12 @@ public Interval liftOver(final Interval interval, final double liftOverMinMatch) } else if (candidateIntersection != null) { hasOverlapBelowThreshold = true; if (logFailedIntervals) { - LOG.info("Interval " + interval.getName() + " failed to match chain " + chain.id + - " because intersection length " + candidateIntersection.intersectionLength + " < minMatchSize " - + minMatchSize + - " (" + (candidateIntersection.intersectionLength/(float)interval.length()) + " < " + liftOverMinMatch + ")"); + LOG.info("Interval " + interval.getName() + " failed to match chain " + chain.id + + " because intersection length " + + candidateIntersection.intersectionLength + " < minMatchSize " + + minMatchSize + " (" + + (candidateIntersection.intersectionLength / (float) interval.length()) + " < " + + liftOverMinMatch + ")"); } } } @@ -185,8 +184,8 @@ public Interval liftOver(final Interval interval, final double liftOverMinMatch) public List diagnosticLiftover(final Interval interval) { final List ret = new ArrayList(); if (interval.length() == 0) { - throw new IllegalArgumentException("Zero-length interval cannot be lifted over. Interval: " + - interval.getName()); + throw new IllegalArgumentException( + "Zero-length interval cannot be lifted over. Interval: " + interval.getName()); } for (final Chain chain : chains.getOverlaps(interval)) { Interval intersectingChain = interval.intersect(chain.interval); @@ -194,9 +193,11 @@ public List diagnosticLiftover(final Interval interval) { if (targetIntersection == null) { ret.add(new PartialLiftover(intersectingChain, chain.id)); } else { - Interval toInterval = createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection); - float percentLiftedOver = targetIntersection.intersectionLength/(float)interval.length(); - ret.add(new PartialLiftover(intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); + Interval toInterval = + createToInterval(interval.getName(), interval.isNegativeStrand(), targetIntersection); + float percentLiftedOver = targetIntersection.intersectionLength / (float) interval.length(); + ret.add(new PartialLiftover( + intersectingChain, toInterval, targetIntersection.chain.id, percentLiftedOver)); } } return ret; @@ -209,11 +210,19 @@ public Map> getContigMap() { return Collections.unmodifiableMap(contigMap); } - private static Interval createToInterval(final String intervalName, final boolean sourceNegativeStrand, final TargetIntersection targetIntersection) { + private static Interval createToInterval( + final String intervalName, + final boolean sourceNegativeStrand, + final TargetIntersection targetIntersection) { // Compute the query interval given the offsets of the target interval start and end into the first and // last ContinuousBlocks. - int toStart = targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart + targetIntersection.startOffset; - int toEnd = targetIntersection.chain.getBlock(targetIntersection.lastBlockIndex).getToEnd() - targetIntersection.offsetFromEnd; + int toStart = targetIntersection.chain.getBlock(targetIntersection.firstBlockIndex).toStart + + targetIntersection.startOffset; + int toEnd = targetIntersection + .chain + .getBlock(targetIntersection.lastBlockIndex) + .getToEnd() + - targetIntersection.offsetFromEnd; if (toEnd <= toStart || toStart < 0) { throw new SAMException("Something strange lifting over interval " + intervalName); } @@ -226,8 +235,9 @@ private static Interval createToInterval(final String intervalName, final boolea toEnd = negativeEnd; } // Convert to 1-based, inclusive. - final boolean negativeStrand = targetIntersection.chain.toOppositeStrand ? !sourceNegativeStrand : sourceNegativeStrand; - return new Interval(targetIntersection.chain.toSequenceName, toStart+1, toEnd, negativeStrand, intervalName); + final boolean negativeStrand = + targetIntersection.chain.toOppositeStrand ? !sourceNegativeStrand : sourceNegativeStrand; + return new Interval(targetIntersection.chain.toSequenceName, toStart + 1, toEnd, negativeStrand, intervalName); } /** @@ -275,7 +285,8 @@ private static TargetIntersection targetIntersection(final Chain chain, final In if (intersectionLength == 0) { return null; } - return new TargetIntersection(chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex); + return new TargetIntersection( + chain, intersectionLength, startOffset, offsetFromEnd, firstBlockIndex, lastBlockIndex); } /** @@ -293,8 +304,8 @@ public void setLiftOverMinMatch(final double liftOverMinMatch) { } /** - * Value class returned by targetIntersection() - */ + * Value class returned by targetIntersection() + */ private static class TargetIntersection { /** Chain used for this intersection */ final Chain chain; @@ -309,8 +320,13 @@ private static class TargetIntersection { /** Index of last ContinuousBlock matching interval. */ final int lastBlockIndex; - TargetIntersection(final Chain chain,final int intersectionLength, final int startOffset, - final int offsetFromEnd, final int firstBlockIndex, final int lastBlockIndex) { + TargetIntersection( + final Chain chain, + final int intersectionLength, + final int startOffset, + final int offsetFromEnd, + final int firstBlockIndex, + final int lastBlockIndex) { this.chain = chain; this.intersectionLength = intersectionLength; this.startOffset = startOffset; @@ -335,7 +351,11 @@ public static class PartialLiftover { /** Percentage of bases in fromInterval that lifted over. 0 if fromInterval is not covered by any chain. */ final float percentLiftedOver; - PartialLiftover(final Interval fromInterval, final Interval toInterval, final int chainId, final float percentLiftedOver) { + PartialLiftover( + final Interval fromInterval, + final Interval toInterval, + final int chainId, + final float percentLiftedOver) { this.fromInterval = fromInterval; this.toInterval = toInterval; this.chainId = chainId; diff --git a/src/main/java/htsjdk/samtools/metrics/Header.java b/src/main/java/htsjdk/samtools/metrics/Header.java index 3672d8bf32..b2bc14732b 100644 --- a/src/main/java/htsjdk/samtools/metrics/Header.java +++ b/src/main/java/htsjdk/samtools/metrics/Header.java @@ -39,5 +39,4 @@ public interface Header extends Serializable { /** Parses the data contained in the String version of the header. */ public void parse(String in); - } diff --git a/src/main/java/htsjdk/samtools/metrics/MetricBase.java b/src/main/java/htsjdk/samtools/metrics/MetricBase.java index 119a479c6e..60e125fa52 100644 --- a/src/main/java/htsjdk/samtools/metrics/MetricBase.java +++ b/src/main/java/htsjdk/samtools/metrics/MetricBase.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.util.FormatUtil; - import java.lang.reflect.Field; /** @@ -46,7 +45,7 @@ public class MetricBase { public boolean equals(final Object o) { if (o == null) return false; if (o.getClass() != getClass()) return false; - + final FormatUtil formatter = new FormatUtil(); // Loop through all the fields and check that they are either // null in both objects or equal in both objects @@ -58,23 +57,22 @@ public boolean equals(final Object o) { if (lhs == null) { if (rhs == null) { // keep going - } - else { + } else { return false; } - } - else { - if (formatter.format(lhs).equals(formatter.format(rhs))) //compare based on the serialized representation + } else { + if (formatter + .format(lhs) + .equals(formatter.format(rhs))) // compare based on the serialized representation { // keep going - } - else { + } else { return false; } } - } - catch (IllegalAccessException iae) { - throw new SAMException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName()); + } catch (IllegalAccessException iae) { + throw new SAMException("Could not read field " + f.getName() + " from a " + + getClass().getSimpleName()); } } @@ -83,7 +81,7 @@ public boolean equals(final Object o) { } /** - * Computes a hashcode by formatting each field into its on disk representation + * Computes a hashcode by formatting each field into its on disk representation * and summing the hashcodes of all the fields. */ public int hashCode() { @@ -92,11 +90,12 @@ public int hashCode() { for (final Field f : getClass().getFields()) { try { Object value = f.get(this); - value = formatter.format(value); //format the value the way it will be written to disk + value = formatter.format(value); // format the value the way it will be written to disk final int fieldHash = value != null ? value.hashCode() : 0; result = 31 * result + fieldHash; } catch (IllegalAccessException e) { - throw new SAMException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName()); + throw new SAMException("Could not read field " + f.getName() + " from a " + + getClass().getSimpleName()); } } return result; @@ -113,13 +112,12 @@ public String toString() { buffer.append('\t'); buffer.append(formatter.format(f.get(this))); buffer.append('\n'); - } - catch (IllegalAccessException iae) { - throw new SAMException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName()); + } catch (IllegalAccessException iae) { + throw new SAMException("Could not read field " + f.getName() + " from a " + + getClass().getSimpleName()); } } return buffer.toString(); } - } diff --git a/src/main/java/htsjdk/samtools/metrics/MetricsFile.java b/src/main/java/htsjdk/samtools/metrics/MetricsFile.java index 5c979600be..261a64d4f4 100644 --- a/src/main/java/htsjdk/samtools/metrics/MetricsFile.java +++ b/src/main/java/htsjdk/samtools/metrics/MetricsFile.java @@ -30,7 +30,6 @@ import htsjdk.samtools.util.Histogram; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.StringUtil; - import java.io.*; import java.lang.reflect.Field; import java.util.ArrayList; @@ -63,23 +62,35 @@ public class MetricsFile imple private final List> histograms = new ArrayList<>(); /** Adds a header to the collection of metrics. */ - public void addHeader(Header h) { this.headers.add(h); } + public void addHeader(Header h) { + this.headers.add(h); + } /** Returns the list of headers. */ - public List
    getHeaders() { return Collections.unmodifiableList(this.headers); } + public List
    getHeaders() { + return Collections.unmodifiableList(this.headers); + } /** Adds a bean to the collection of metrics. */ - public void addMetric(final BEAN bean) { this.metrics.add(bean); } + public void addMetric(final BEAN bean) { + this.metrics.add(bean); + } /** Add multiple metric beans at once. */ public void addAllMetrics(final Iterable beanz) { - for (final BEAN bean : beanz) { this.addMetric(bean); } + for (final BEAN bean : beanz) { + this.addMetric(bean); + } } /** Returns the list of headers. */ - public List getMetrics() { return Collections.unmodifiableList(this.metrics); } + public List getMetrics() { + return Collections.unmodifiableList(this.metrics); + } - public Set getMetricsColumnLabels() { return Collections.unmodifiableSet(this.columnLabels); } + public Set getMetricsColumnLabels() { + return Collections.unmodifiableSet(this.columnLabels); + } /** Returns the histogram contained in the metrics file if any. */ public Histogram getHistogram() { @@ -91,8 +102,7 @@ public Histogram getHistogram() { public void setHistogram(final Histogram histogram) { if (this.histograms.isEmpty()) { if (histogram != null) this.histograms.add(histogram); - } - else { + } else { this.histograms.set(0, histogram); } } @@ -102,17 +112,16 @@ public void addHistogram(final Histogram histogram) { this.histograms.add(histogram); } - //** Returns an unmodifiable version of the histogram list */ + // ** Returns an unmodifiable version of the histogram list */ public List> getAllHistograms() { return Collections.unmodifiableList(histograms); } /** Returns the number of histograms added to the metrics file. */ - public int getNumHistograms() - { - return this.histograms.size(); + public int getNumHistograms() { + return this.histograms.size(); } - + /** Returns the list of headers with the specified type. */ public List
    getHeaders(final Class type) { List
    tmp = new ArrayList
    (); @@ -158,8 +167,7 @@ public void write(final Writer w) { printHistogram(out, formatter); out.newLine(); out.flush(); - } - catch (IOException ioe) { + } catch (IOException ioe) { throw new SAMException("Could not write metrics file.", ioe); } } @@ -192,33 +200,30 @@ private void printBeanMetrics(final BufferedWriter out, final FormatUtil formatt final int fieldCount = fields.length; // Write out the column headers - for (int i=0; i keys = new TreeSet(nonEmptyHistograms.get(0).comparator()); + final java.util.Set keys = + new TreeSet(nonEmptyHistograms.get(0).comparator()); for (final Histogram histo : nonEmptyHistograms) { if (histo != null) keys.addAll(histo.keySet()); } // Add a header for the histogram key type - out.append(HISTO_HEADER + nonEmptyHistograms.get(0).keySet().iterator().next().getClass().getName()); + out.append(HISTO_HEADER + + nonEmptyHistograms + .get(0) + .keySet() + .iterator() + .next() + .getClass() + .getName()); out.newLine(); // Output a header row - out.append(StringUtil.assertCharactersNotInString(nonEmptyHistograms.get(0).getBinLabel(), '\t', '\n')); + out.append( + StringUtil.assertCharactersNotInString(nonEmptyHistograms.get(0).getBinLabel(), '\t', '\n')); for (final Histogram histo : nonEmptyHistograms) { out.append(SEPARATOR); out.append(StringUtil.assertCharactersNotInString(histo.getValueLabel(), '\t', '\n')); @@ -292,48 +306,43 @@ public void read(final Reader r) { line = line.trim(); if ("".equals(line)) { // Do nothing! Nothing to be done! - } - else if (line.startsWith(METRIC_HEADER) || line.startsWith(HISTO_HEADER)) { + } else if (line.startsWith(METRIC_HEADER) || line.startsWith(HISTO_HEADER)) { // A line that starts with "## METRICS CLASS" heralds the start of the actual // data. Bounce our butts out of header parsing without reading the next line. // This isn't in the while loop's conditional because we want to trim() first. break; - } - else if (line.startsWith(MAJOR_HEADER_PREFIX)) { + } else if (line.startsWith(MAJOR_HEADER_PREFIX)) { if (header != null) { throw new IllegalStateException("Consecutive header class lines encountered."); } - - final String className = line.substring(MAJOR_HEADER_PREFIX.length()).trim(); + + final String className = + line.substring(MAJOR_HEADER_PREFIX.length()).trim(); try { header = (Header) loadClass(className, true).newInstance(); - } - catch (final Exception e) { + } catch (final Exception e) { throw new SAMException("Error load and/or instantiating an instance of " + className, e); } - } - else if (line.startsWith(MINOR_HEADER_PREFIX)) { + } else if (line.startsWith(MINOR_HEADER_PREFIX)) { if (header == null) { throw new IllegalStateException("Header class must precede header value:" + line); } header.parse(line.substring(MINOR_HEADER_PREFIX.length())); this.headers.add(header); header = null; - } - else { + } else { throw new SAMException("Illegal state. Found following string in metrics file header: " + line); } } // Read space between headers and metrics, if any - while (line != null && ! line.trim().startsWith(MAJOR_HEADER_PREFIX)) { + while (line != null && !line.trim().startsWith(MAJOR_HEADER_PREFIX)) { line = in.readLine(); } - if (line != null) { line = line.trim(); - + // Then read the metrics if there are any if (line.startsWith(METRIC_HEADER)) { // Get the metric class from the header @@ -341,8 +350,7 @@ else if (line.startsWith(MINOR_HEADER_PREFIX)) { Class type = null; try { type = loadClass(className, true); - } - catch (final ClassNotFoundException cnfe) { + } catch (final ClassNotFoundException cnfe) { throw new SAMException("Could not locate class with name " + className, cnfe); } @@ -350,13 +358,12 @@ else if (line.startsWith(MINOR_HEADER_PREFIX)) { final String[] fieldNames = in.readLine().split(SEPARATOR); Collections.addAll(columnLabels, fieldNames); final Field[] fields = new Field[fieldNames.length]; - for (int i=0; i keyClass = null; - try { keyClass = loadClass(keyClassName, true); } - catch (final ClassNotFoundException cnfe) { throw new SAMException("Could not load class with name " + keyClassName); } + try { + keyClass = loadClass(keyClassName, true); + } catch (final ClassNotFoundException cnfe) { + throw new SAMException("Could not load class with name " + keyClassName); + } // Read the next line with the bin and value labels final String[] labels = in.readLine().split(SEPARATOR); - for (int i=1; i(labels[0], labels[i])); } @@ -419,18 +434,16 @@ else if (line.startsWith(MINOR_HEADER_PREFIX)) { final String[] fields = line.trim().split(SEPARATOR); final HKEY key = (HKEY) formatter.parseObject(fields[0], keyClass); - for (int i=1; i loadClass(final String className, final boolean tryOtherPackages) throws ClassNotFoundException { // List of alternative packages to check in case classes moved around final String[] packages = new String[] { - "edu.mit.broad.picard.genotype.concordance", - "edu.mit.broad.picard.genotype.fingerprint", - "edu.mit.broad.picard.ic", - "edu.mit.broad.picard.illumina", - "edu.mit.broad.picard.jumping", - "edu.mit.broad.picard.quality", - "edu.mit.broad.picard.samplevalidation", - "htsjdk.samtools.analysis", - "htsjdk.samtools.analysis.directed", - "htsjdk.samtools.sam", - "htsjdk.samtools.metrics", - "picard.sam", - "picard.metrics", - "picard.illumina", - "picard.analysis", - "picard.analysis.directed", - "picard.vcf" + "edu.mit.broad.picard.genotype.concordance", + "edu.mit.broad.picard.genotype.fingerprint", + "edu.mit.broad.picard.ic", + "edu.mit.broad.picard.illumina", + "edu.mit.broad.picard.jumping", + "edu.mit.broad.picard.quality", + "edu.mit.broad.picard.samplevalidation", + "htsjdk.samtools.analysis", + "htsjdk.samtools.analysis.directed", + "htsjdk.samtools.sam", + "htsjdk.samtools.metrics", + "picard.sam", + "picard.metrics", + "picard.illumina", + "picard.analysis", + "picard.analysis.directed", + "picard.vcf" }; - try { return Class.forName(className); } - catch (ClassNotFoundException cnfe) { + try { + return Class.forName(className); + } catch (ClassNotFoundException cnfe) { if (tryOtherPackages) { for (final String p : packages) { try { return loadClass(p + className.substring(className.lastIndexOf('.')), false); + } catch (ClassNotFoundException cnf2) { + /* do nothing */ } - catch (ClassNotFoundException cnf2) {/* do nothing */} // If it ws an inner class, try and see if it's a stand-alone class now if (className.indexOf('$') > -1) { try { return loadClass(p + "." + className.substring(className.lastIndexOf('$') + 1), false); + } catch (ClassNotFoundException cnf2) { + /* do nothing */ } - catch (ClassNotFoundException cnf2) {/* do nothing */} } } } @@ -562,7 +578,6 @@ public static boolean areMetricsEqual(final File file1, final File file2) { } catch (FileNotFoundException e) { throw new SAMException(e.getMessage(), e); } - } /** diff --git a/src/main/java/htsjdk/samtools/metrics/StringHeader.java b/src/main/java/htsjdk/samtools/metrics/StringHeader.java index 949dd41535..0231afec90 100644 --- a/src/main/java/htsjdk/samtools/metrics/StringHeader.java +++ b/src/main/java/htsjdk/samtools/metrics/StringHeader.java @@ -44,11 +44,21 @@ public StringHeader(String value) { } @Override - public void parse(String in) { value = in.trim(); } - public String toString() { return value; } + public void parse(String in) { + value = in.trim(); + } + + public String toString() { + return value; + } + + public String getValue() { + return value; + } - public String getValue() { return value; } - public void setValue(String value) { this.value = StringUtil.assertCharactersNotInString(value, '\n'); } + public void setValue(String value) { + this.value = StringUtil.assertCharactersNotInString(value, '\n'); + } /** Checks equality on the value of the header. */ public boolean equals(Object o) { @@ -56,12 +66,10 @@ public boolean equals(Object o) { StringHeader that = (StringHeader) o; if (this.value == null) { return that.value == null; - } - else { + } else { return this.value.equals(that.value); } - } - else { + } else { return false; } } diff --git a/src/main/java/htsjdk/samtools/metrics/VersionHeader.java b/src/main/java/htsjdk/samtools/metrics/VersionHeader.java index 82093aaa56..e179ce5939 100644 --- a/src/main/java/htsjdk/samtools/metrics/VersionHeader.java +++ b/src/main/java/htsjdk/samtools/metrics/VersionHeader.java @@ -48,12 +48,18 @@ public String toString() { return this.versionedItem + "\t" + this.versionString; } - public String getVersionedItem() { return versionedItem; } + public String getVersionedItem() { + return versionedItem; + } + public void setVersionedItem(String versionedItem) { this.versionedItem = StringUtil.assertCharactersNotInString(versionedItem, '\t', '\n'); } - public String getVersionString() { return versionString; } + public String getVersionString() { + return versionString; + } + public void setVersionString(String versionString) { this.versionString = StringUtil.assertCharactersNotInString(versionString, '\t', '\n'); } diff --git a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java index f0d2585741..98a59a53fb 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java @@ -31,7 +31,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Lazy; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -65,7 +64,8 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile { this.path = path; this.source = path == null ? "unknown" : path.toAbsolutePath().toString(); // ensure lambda is serializable (by Kryo, when used with Spark) - this.dictionary = new Lazy<>((Supplier & Serializable) (() -> findAndLoadSequenceDictionary(path))); + this.dictionary = new Lazy<>( + (Supplier & Serializable) (() -> findAndLoadSequenceDictionary(path))); } /** @@ -108,7 +108,8 @@ protected static SAMSequenceDictionary loadSequenceDictionary(final IOPath dictP } /** @deprecated use findSequenceDictionary(Path) instead. */ - @Deprecated protected static File findSequenceDictionary(final File file) { + @Deprecated + protected static File findSequenceDictionary(final File file) { final Path dict = findSequenceDictionary(file.toPath()); return dict == null ? null : dict.toFile(); } @@ -124,11 +125,11 @@ protected static Path findSequenceDictionary(final Path fastaPath) { return dictionary; } // try without removing the file extension - final Path dictionaryExt = fastaPath.resolveSibling(fastaPath.getFileName().toString() + FileExtensions.DICT); + final Path dictionaryExt = + fastaPath.resolveSibling(fastaPath.getFileName().toString() + FileExtensions.DICT); if (Files.exists(dictionaryExt)) { return dictionaryExt; - } - else { + } else { return null; } } @@ -167,18 +168,21 @@ public String toString() { /** default implementation -- override if index is supported */ @Override - public boolean isIndexed() {return false;} + public boolean isIndexed() { + return false; + } /** default implementation -- override if index is supported */ @Override - public ReferenceSequence getSequence( String contig ) { - throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + ". samtools faidx can be used to create an index"); + public ReferenceSequence getSequence(String contig) { + throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + + ". samtools faidx can be used to create an index"); } /** default implementation -- override if index is supported */ @Override - public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { - throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + ". samtools faidx can be used to create an index"); + public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) { + throw new UnsupportedOperationException("Index does not appear to exist for " + getSource() + + ". samtools faidx can be used to create an index"); } - } diff --git a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java index c08c87ff18..6e130d0804 100644 --- a/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/AbstractIndexedFastaSequenceFile.java @@ -30,7 +30,6 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.util.IOUtil; - import java.io.FileNotFoundException; import java.io.IOException; import java.nio.ByteBuffer; @@ -78,7 +77,8 @@ protected AbstractIndexedFastaSequenceFile(final Path path, final FastaSequenceI * @param dictPath the path to the sequence dictionary. may be null. * @param index the associated index object; may not be null. */ - protected AbstractIndexedFastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final FastaSequenceIndex index) { + protected AbstractIndexedFastaSequenceFile( + final IOPath fastaPath, final IOPath dictPath, final FastaSequenceIndex index) { super(fastaPath.toPath(), fastaPath.getURIString(), loadSequenceDictionary(dictPath)); if (index == null) { throw new IllegalArgumentException("Null index for fasta " + index); @@ -97,7 +97,8 @@ protected AbstractIndexedFastaSequenceFile(final IOPath fastaPath, final IOPath * @param index The fasta index. * @param dictionary The sequence dictionary, or null if there isn't one. */ - protected AbstractIndexedFastaSequenceFile(String source, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { + protected AbstractIndexedFastaSequenceFile( + String source, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { super(null, source, dictionary); this.index = index; reset(); @@ -105,7 +106,9 @@ protected AbstractIndexedFastaSequenceFile(String source, final FastaSequenceInd protected static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException { Path ret = findFastaIndex(fastaFile); - if (ret == null) throw new FileNotFoundException(ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); + if (ret == null) + throw new FileNotFoundException( + ReferenceSequenceFileFactory.getFastaIndexFileName(fastaFile) + " not found."); return ret; } @@ -121,28 +124,30 @@ protected static Path findFastaIndex(Path fastaFile) { * @param sequenceDictionary sequence dictionary to check against the index. * @param index index file to check against the dictionary. */ - protected static void sanityCheckDictionaryAgainstIndex(final String fastaFile, - final SAMSequenceDictionary sequenceDictionary, - final FastaSequenceIndex index) { + protected static void sanityCheckDictionaryAgainstIndex( + final String fastaFile, final SAMSequenceDictionary sequenceDictionary, final FastaSequenceIndex index) { // Make sure dictionary and index are the same size. - if( sequenceDictionary.getSequences().size() != index.size() ) + if (sequenceDictionary.getSequences().size() != index.size()) throw new SAMException("Sequence dictionary and index contain different numbers of contigs"); - Iterator sequenceIterator = sequenceDictionary.getSequences().iterator(); + Iterator sequenceIterator = + sequenceDictionary.getSequences().iterator(); Iterator indexIterator = index.iterator(); - while(sequenceIterator.hasNext() && indexIterator.hasNext()) { + while (sequenceIterator.hasNext() && indexIterator.hasNext()) { SAMSequenceRecord sequenceEntry = sequenceIterator.next(); FastaSequenceIndexEntry indexEntry = indexIterator.next(); - if(!sequenceEntry.getSequenceName().equals(indexEntry.getContig())) { - throw new SAMException(String.format("Mismatch between sequence dictionary fasta index for %s, sequence '%s' != '%s'.", - fastaFile, sequenceEntry.getSequenceName(),indexEntry.getContig())); + if (!sequenceEntry.getSequenceName().equals(indexEntry.getContig())) { + throw new SAMException(String.format( + "Mismatch between sequence dictionary fasta index for %s, sequence '%s' != '%s'.", + fastaFile, sequenceEntry.getSequenceName(), indexEntry.getContig())); } // Make sure sequence length matches index length. - if( sequenceEntry.getSequenceLength() != indexEntry.getSize()) - throw new SAMException("Index length does not match dictionary length for contig: " + sequenceEntry.getSequenceName() ); + if (sequenceEntry.getSequenceLength() != indexEntry.getSize()) + throw new SAMException( + "Index length does not match dictionary length for contig: " + sequenceEntry.getSequenceName()); } } @@ -155,10 +160,9 @@ public FastaSequenceIndex getIndex() { * @return next sequence if available, or null if not present. */ @Override - public ReferenceSequence nextSequence() { - if( !indexIterator.hasNext() ) - return null; - return getSequence( indexIterator.next().getContig() ); + public ReferenceSequence nextSequence() { + if (!indexIterator.hasNext()) return null; + return getSequence(indexIterator.next().getContig()); } /** @@ -180,8 +184,8 @@ public final boolean isIndexed() { * @return The full sequence associated with this contig. */ @Override - public ReferenceSequence getSequence( String contig ) { - return getSubsequenceAt( contig, 1, (int)index.getIndexEntry(contig).getSize() ); + public ReferenceSequence getSequence(String contig) { + return getSubsequenceAt(contig, 1, (int) index.getIndexEntry(contig).getSize()); } /** @@ -192,16 +196,16 @@ public ReferenceSequence getSequence( String contig ) { * @return The partial reference sequence associated with this range. */ @Override - public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) { - if(start > stop + 1) - throw new SAMException(String.format("Malformed query; start point %d lies after end point %d",start,stop)); + public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) { + if (start > stop + 1) + throw new SAMException( + String.format("Malformed query; start point %d lies after end point %d", start, stop)); FastaSequenceIndexEntry indexEntry = getIndex().getIndexEntry(contig); - if(stop > indexEntry.getSize()) - throw new SAMException("Query asks for data past end of contig"); + if (stop > indexEntry.getSize()) throw new SAMException("Query asks for data past end of contig"); - int length = (int)(stop - start + 1); + int length = (int) (stop - start + 1); byte[] target = new byte[length]; ByteBuffer targetBuffer = ByteBuffer.wrap(target); @@ -210,49 +214,53 @@ public ReferenceSequence getSubsequenceAt( String contig, long start, long stop final int bytesPerLine = indexEntry.getBytesPerLine(); final int terminatorLength = bytesPerLine - basesPerLine; - long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine; + long startOffset = ((start - 1) / basesPerLine) * bytesPerLine + (start - 1) % basesPerLine; // Cast to long so the second argument cannot overflow a signed integer. - final long minBufferSize = Math.min((long) Defaults.NON_ZERO_BUFFER_SIZE, (long)(length / basesPerLine + 2) * (long)bytesPerLine); - if (minBufferSize > Integer.MAX_VALUE) throw new SAMException("Buffer is too large: " + minBufferSize); + final long minBufferSize = Math.min( + (long) Defaults.NON_ZERO_BUFFER_SIZE, (long) (length / basesPerLine + 2) * (long) bytesPerLine); + if (minBufferSize > Integer.MAX_VALUE) throw new SAMException("Buffer is too large: " + minBufferSize); // Allocate a buffer for reading in sequence data. - final ByteBuffer channelBuffer = ByteBuffer.allocate((int)minBufferSize); + final ByteBuffer channelBuffer = ByteBuffer.allocate((int) minBufferSize); - while(targetBuffer.position() < length) { - // If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward to the next printable character. - startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0); + while (targetBuffer.position() < length) { + // If the bufferOffset is currently within the eol characters in the string, push the bufferOffset forward + // to the next printable character. + startOffset += Math.max((int) (startOffset % bytesPerLine - basesPerLine + 1), 0); try { - startOffset += readFromPosition(channelBuffer, indexEntry.getLocation()+startOffset); - } - catch(IOException ex) { - throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getSource(), ex); + startOffset += readFromPosition(channelBuffer, indexEntry.getLocation() + startOffset); + } catch (IOException ex) { + throw new SAMException( + "Unable to load " + contig + "(" + start + ", " + stop + ") from " + getSource(), ex); } // Reset the buffer for outbound transfers. channelBuffer.flip(); // Calculate the size of the next run of bases based on the contents we've already retrieved. - final int positionInContig = (int)start-1+targetBuffer.position(); - final int nextBaseSpan = Math.min(basesPerLine-positionInContig%basesPerLine,length-targetBuffer.position()); + final int positionInContig = (int) start - 1 + targetBuffer.position(); + final int nextBaseSpan = + Math.min(basesPerLine - positionInContig % basesPerLine, length - targetBuffer.position()); // Cap the bytes to transfer by limiting the nextBaseSpan to the size of the channel buffer. - int bytesToTransfer = Math.min(nextBaseSpan,channelBuffer.capacity()); + int bytesToTransfer = Math.min(nextBaseSpan, channelBuffer.capacity()); - channelBuffer.limit(channelBuffer.position()+bytesToTransfer); + channelBuffer.limit(channelBuffer.position() + bytesToTransfer); - while(channelBuffer.hasRemaining()) { + while (channelBuffer.hasRemaining()) { targetBuffer.put(channelBuffer); - bytesToTransfer = Math.min(basesPerLine,length-targetBuffer.position()); - channelBuffer.limit(Math.min(channelBuffer.position()+bytesToTransfer+terminatorLength,channelBuffer.capacity())); - channelBuffer.position(Math.min(channelBuffer.position()+terminatorLength,channelBuffer.capacity())); + bytesToTransfer = Math.min(basesPerLine, length - targetBuffer.position()); + channelBuffer.limit(Math.min( + channelBuffer.position() + bytesToTransfer + terminatorLength, channelBuffer.capacity())); + channelBuffer.position(Math.min(channelBuffer.position() + terminatorLength, channelBuffer.capacity())); } // Reset the buffer for inbound transfers. channelBuffer.flip(); } - return new ReferenceSequence( contig, indexEntry.getSequenceIndex(), target ); + return new ReferenceSequence(contig, indexEntry.getSequenceIndex(), target); } /** diff --git a/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java index 96d655d288..40b1e2564a 100644 --- a/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/BlockCompressedIndexedFastaSequenceFile.java @@ -32,7 +32,6 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; - import java.io.FileNotFoundException; import java.io.IOException; import java.nio.ByteBuffer; @@ -50,8 +49,7 @@ public class BlockCompressedIndexedFastaSequenceFile extends AbstractIndexedFast private final BlockCompressedInputStream stream; private final GZIIndex gzindex; - public BlockCompressedIndexedFastaSequenceFile(final Path path) - throws FileNotFoundException { + public BlockCompressedIndexedFastaSequenceFile(final Path path) throws FileNotFoundException { this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path)))); } @@ -64,10 +62,7 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path) * @param gziIndex the associated gziIndex */ public BlockCompressedIndexedFastaSequenceFile( - final IOPath fastaPath, - final IOPath dictPath, - final FastaSequenceIndex index, - final GZIIndex gziIndex) { + final IOPath fastaPath, final IOPath dictPath, final FastaSequenceIndex index, final GZIIndex gziIndex) { super(fastaPath, dictPath, index); if (gziIndex == null) { throw new IllegalArgumentException("null gzi index"); @@ -85,7 +80,8 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSeque this(path, index, loadFastaGziIndex(path)); } - public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index, final GZIIndex gziIndex) { + public BlockCompressedIndexedFastaSequenceFile( + final Path path, final FastaSequenceIndex index, final GZIIndex gziIndex) { super(path, index); if (gziIndex == null) { throw new IllegalArgumentException("null gzi index"); @@ -107,7 +103,12 @@ public BlockCompressedIndexedFastaSequenceFile(final Path path, final FastaSeque * @param dictionary The sequence dictionary, or null if there isn't one. * @param gziIndex The GZI index; may not be null. */ - public BlockCompressedIndexedFastaSequenceFile(final String source, final SeekableStream in, final FastaSequenceIndex index, final SAMSequenceDictionary dictionary, final GZIIndex gziIndex) { + public BlockCompressedIndexedFastaSequenceFile( + final String source, + final SeekableStream in, + final FastaSequenceIndex index, + final SAMSequenceDictionary dictionary, + final GZIIndex gziIndex) { super(source, index, dictionary); if (gziIndex == null) { throw new IllegalArgumentException("null gzi index"); diff --git a/src/main/java/htsjdk/samtools/reference/FastaReferenceWriter.java b/src/main/java/htsjdk/samtools/reference/FastaReferenceWriter.java index ca422f4b90..450f2d4e18 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaReferenceWriter.java +++ b/src/main/java/htsjdk/samtools/reference/FastaReferenceWriter.java @@ -28,8 +28,6 @@ import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.util.SequenceUtil; import htsjdk.utils.ValidationUtils; -import org.apache.commons.compress.utils.CountingOutputStream; - import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; @@ -41,6 +39,7 @@ import java.security.NoSuchAlgorithmException; import java.util.HashSet; import java.util.Set; +import org.apache.commons.compress.utils.CountingOutputStream; /** * Writes a FASTA formatted reference file. @@ -53,7 +52,7 @@ * byte[][] seqBases = ...; * ... * try (final FastaReferenceWriter writer = new FastaReferenceFileWriter(outputFile)) { - * for (int i = 0; i < seqNames.length; i++) { + * for (int i = 0; i < seqNames.length; i++) { * writer.startSequence(seqNames[i]).appendBases(seqBases[i]); * } * } @@ -110,7 +109,8 @@ public final class FastaReferenceWriter implements AutoCloseable { /** * Convenient cached {@code byte[]} representation of the line separator. */ - private static final byte[] LINE_SEPARATOR = String.valueOf(LINE_SEPARATOR_CHR).getBytes(CHARSET); + private static final byte[] LINE_SEPARATOR = + String.valueOf(LINE_SEPARATOR_CHR).getBytes(CHARSET); /** * Output stream to the main FASTA output. @@ -133,7 +133,6 @@ public final class FastaReferenceWriter implements AutoCloseable { */ private final Writer dictWriter; - /** * the md5 digester (or null if not adding md5) */ @@ -198,10 +197,12 @@ public final class FastaReferenceWriter implements AutoCloseable { * @param dictOutput the (uncompressed) output stream to the dictFile, if requested, {@code null} if none should be generated. * @throws IllegalArgumentException if {@code fastaFile} is {@code null} or {@code basesPerLine} is 0 or negative. */ - FastaReferenceWriter(final int basesPerLine, final boolean addMd5, - final OutputStream fastaOutput, - final OutputStream indexOutput, - final OutputStream dictOutput) { + FastaReferenceWriter( + final int basesPerLine, + final boolean addMd5, + final OutputStream fastaOutput, + final OutputStream indexOutput, + final OutputStream dictOutput) { try { this.md5Digester = addMd5 ? MessageDigest.getInstance("MD5") : null; @@ -211,7 +212,8 @@ public final class FastaReferenceWriter implements AutoCloseable { this.defaultBasePerLine = basesPerLine; this.fastaStream = new CountingOutputStream(fastaOutput); - this.faiIndexWriter = indexOutput == null ? NullWriter.NULL_WRITER : new OutputStreamWriter(indexOutput, CHARSET); + this.faiIndexWriter = + indexOutput == null ? NullWriter.NULL_WRITER : new OutputStreamWriter(indexOutput, CHARSET); this.dictWriter = dictOutput == null ? NullWriter.NULL_WRITER : new OutputStreamWriter(dictOutput, CHARSET); this.dictCodec = new SAMSequenceDictionaryCodec(dictWriter); this.dictCodec.encodeHeaderLine(false); @@ -250,8 +252,8 @@ private static String checkDescription(final String description) { for (int i = 0; i < description.length(); i++) { final char c = description.charAt(i); if (Character.isISOControl(c) && c != '\t') { // tab is the only valid control char in the description. - throw new IllegalArgumentException("the input name contains non-tab control characters: '" + - description + "'"); + throw new IllegalArgumentException( + "the input name contains non-tab control characters: '" + description + "'"); } } return description; @@ -288,8 +290,7 @@ private static String checkDescription(final String description) { * @throws IllegalStateException if no base was added to the previous sequence or the writer is already closed. * @throws IOException if such exception is thrown when writing into the output resources. */ - public FastaReferenceWriter startSequence(final String sequenceName) - throws IOException { + public FastaReferenceWriter startSequence(final String sequenceName) throws IOException { return startSequence(sequenceName, "", defaultBasePerLine); } @@ -321,8 +322,7 @@ public FastaReferenceWriter startSequence(final String sequenceName) * @throws IllegalStateException if no base was added to the previous sequence or the writer is already closed. * @throws IOException if such exception is thrown when writing into the output resources. */ - public FastaReferenceWriter startSequence(final String sequenceName, final int basesPerLine) - throws IOException { + public FastaReferenceWriter startSequence(final String sequenceName, final int basesPerLine) throws IOException { return startSequence(sequenceName, "", FastaReferenceWriterBuilder.checkBasesPerLine(basesPerLine)); } @@ -359,8 +359,7 @@ public FastaReferenceWriter startSequence(final String sequenceName, final int b * @throws IllegalStateException if no base was added to the previous sequence or the writer is already closed. * @throws IOException if such exception is thrown when writing into the output resources. */ - public FastaReferenceWriter startSequence(final String sequenceName, final String description) - throws IOException { + public FastaReferenceWriter startSequence(final String sequenceName, final String description) throws IOException { return startSequence(sequenceName, description, defaultBasePerLine); } @@ -397,8 +396,8 @@ public FastaReferenceWriter startSequence(final String sequenceName, final Strin * the sequence has been already added. * @throws IOException if such exception is thrown when writing into the output resources. */ - public FastaReferenceWriter startSequence(final String sequenceName, final String description, final int basesPerLine) - throws IOException { + public FastaReferenceWriter startSequence( + final String sequenceName, final String description, final int basesPerLine) throws IOException { assertIsNotClosed(); checkSequenceName(sequenceName); final String nonNullDescription = checkDescription(description); @@ -424,8 +423,7 @@ public FastaReferenceWriter startSequence(final String sequenceName, final Strin return this; } - private void closeSequence() - throws IOException { + private void closeSequence() throws IOException { if (currentSequenceName != null) { if (currentBasesCount == 0) { throw new IllegalStateException("no base was added"); @@ -440,13 +438,18 @@ private void closeSequence() } } - private void writeIndexEntry() - throws IOException { - faiIndexWriter.append(currentSequenceName).append(INDEX_FIELD_SEPARATOR_CHR) - .append(String.valueOf(currentBasesCount)).append(INDEX_FIELD_SEPARATOR_CHR) - .append(String.valueOf(currentSequenceOffset)).append(INDEX_FIELD_SEPARATOR_CHR) - .append(String.valueOf(currentBasesPerLine)).append(INDEX_FIELD_SEPARATOR_CHR) - .append(String.valueOf(currentBasesPerLine + LINE_SEPARATOR.length)).append(LINE_SEPARATOR_CHR); + private void writeIndexEntry() throws IOException { + faiIndexWriter + .append(currentSequenceName) + .append(INDEX_FIELD_SEPARATOR_CHR) + .append(String.valueOf(currentBasesCount)) + .append(INDEX_FIELD_SEPARATOR_CHR) + .append(String.valueOf(currentSequenceOffset)) + .append(INDEX_FIELD_SEPARATOR_CHR) + .append(String.valueOf(currentBasesPerLine)) + .append(INDEX_FIELD_SEPARATOR_CHR) + .append(String.valueOf(currentBasesPerLine + LINE_SEPARATOR.length)) + .append(LINE_SEPARATOR_CHR); } private void writeDictEntry() { @@ -468,8 +471,7 @@ private void writeDictEntry() { * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ - public FastaReferenceWriter appendBases(final String basesBases) - throws IOException { + public FastaReferenceWriter appendBases(final String basesBases) throws IOException { return appendBases(basesBases.getBytes(StandardCharsets.US_ASCII)); } @@ -484,8 +486,7 @@ public FastaReferenceWriter appendBases(final String basesBases) * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ - public FastaReferenceWriter appendBases(final byte[] bases) - throws IOException { + public FastaReferenceWriter appendBases(final byte[] bases) throws IOException { return appendBases(bases, 0, bases.length); } @@ -503,16 +504,17 @@ public FastaReferenceWriter appendBases(final byte[] bases) * @throws IllegalStateException if no sequence was started or the writer is already closed. * @throws IOException if such exception is throw when writing in any of the outputs. */ - public FastaReferenceWriter appendBases(final byte[] bases, final int offset, final int length) - throws IOException { + public FastaReferenceWriter appendBases(final byte[] bases, final int offset, final int length) throws IOException { assertIsNotClosed(); assertSequenceOpen(); checkSequenceBases(bases, offset, length); ValidationUtils.validateArg(offset >= 0, "the input offset cannot be negative"); ValidationUtils.validateArg(length >= 0, "the input length must not be negative"); final int to = offset + length; - ValidationUtils.validateArg(to <= bases.length, "the length + offset goes beyond the end of " + - "the input base array: '" + to + "' > '" + bases.length + "'"); + ValidationUtils.validateArg( + to <= bases.length, + "the length + offset goes beyond the end of " + "the input base array: '" + to + "' > '" + bases.length + + "'"); int next = offset; while (next < to) { @@ -523,7 +525,8 @@ public FastaReferenceWriter appendBases(final byte[] bases, final int offset, fi final int nextLength = Math.min(to - next, currentBasesPerLine - currentLineBasesCount); fastaStream.write(bases, next, nextLength); if (md5Digester != null) { - md5Digester.update(new String(bases, next, nextLength).toUpperCase().getBytes()); + md5Digester.update( + new String(bases, next, nextLength).toUpperCase().getBytes()); } currentLineBasesCount += nextLength; next += nextLength; @@ -574,7 +577,8 @@ public FastaReferenceWriter addSequence(ReferenceSequence sequence) throws IOExc * @throws IllegalStateException if the writer is already closed, a previous sequence (if any was opened) has no base appended to it or a sequence * with such name was already appended to this writer. */ - public FastaReferenceWriter appendSequence(final String name, final String description, final byte[] bases) throws IOException { + public FastaReferenceWriter appendSequence(final String name, final String description, final byte[] bases) + throws IOException { return startSequence(name, description).appendBases(bases); } @@ -601,7 +605,9 @@ public FastaReferenceWriter appendSequence(final String name, final String descr * @throws IllegalStateException if the writer is already closed, a previous sequence (if any was opened) has no base appended to it or a sequence * with such name was already appended to this writer. */ - public FastaReferenceWriter appendSequence(final String name, final String description, final int basesPerLine, final byte[] bases) throws IOException { + public FastaReferenceWriter appendSequence( + final String name, final String description, final int basesPerLine, final byte[] bases) + throws IOException { return startSequence(name, description, basesPerLine).appendBases(bases); } @@ -654,11 +660,19 @@ public void close() throws IOException { * @param bases the sequence bases, cannot be {@code null}. * @throws IOException if such exception is thrown when writing in the output resources. */ - public static void writeSingleSequenceReference(final Path whereTo, final boolean makeIndex, - final boolean makeDict, final String name, - final String description, final byte[] bases) + public static void writeSingleSequenceReference( + final Path whereTo, + final boolean makeIndex, + final boolean makeDict, + final String name, + final String description, + final byte[] bases) throws IOException { - try (final FastaReferenceWriter writer = new FastaReferenceWriterBuilder().setFastaFile(whereTo).setMakeFaiOutput(makeIndex).setMakeDictOutput(makeDict).build()) { + try (final FastaReferenceWriter writer = new FastaReferenceWriterBuilder() + .setFastaFile(whereTo) + .setMakeFaiOutput(makeIndex) + .setMakeDictOutput(makeDict) + .build()) { writer.startSequence(name, description); writer.appendBases(bases); } @@ -676,11 +690,21 @@ public static void writeSingleSequenceReference(final Path whereTo, final boolea * @param bases the sequence bases, cannot be {@code null}. * @throws IOException if such exception is thrown when writing in the output resources. */ - public static void writeSingleSequenceReference(final Path whereTo, final int basesPerLine, final boolean makeIndex, - final boolean makeDict, final String name, - final String description, final byte[] bases) + public static void writeSingleSequenceReference( + final Path whereTo, + final int basesPerLine, + final boolean makeIndex, + final boolean makeDict, + final String name, + final String description, + final byte[] bases) throws IOException { - try (final FastaReferenceWriter writer = new FastaReferenceWriterBuilder().setBasesPerLine(basesPerLine).setFastaFile(whereTo).setMakeFaiOutput(makeIndex).setMakeDictOutput(makeDict).build()) { + try (final FastaReferenceWriter writer = new FastaReferenceWriterBuilder() + .setBasesPerLine(basesPerLine) + .setFastaFile(whereTo) + .setMakeFaiOutput(makeIndex) + .setMakeDictOutput(makeDict) + .build()) { writer.startSequence(name, description); writer.appendBases(bases); } @@ -703,12 +727,11 @@ public void close() throws IOException { // no op } - private NullWriter() { - } + private NullWriter() {} /** * The only singleton instance of this class (no need for more!) */ - public final static NullWriter NULL_WRITER = new NullWriter(); + public static final NullWriter NULL_WRITER = new NullWriter(); } } diff --git a/src/main/java/htsjdk/samtools/reference/FastaReferenceWriterBuilder.java b/src/main/java/htsjdk/samtools/reference/FastaReferenceWriterBuilder.java index 656a6af743..db7bb92f82 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaReferenceWriterBuilder.java +++ b/src/main/java/htsjdk/samtools/reference/FastaReferenceWriterBuilder.java @@ -28,7 +28,6 @@ import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; import htsjdk.utils.ValidationUtils; - import java.io.BufferedOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -189,7 +188,6 @@ public FastaReferenceWriterBuilder setDictFile(final Path dictFile) { * @param fastaOutput a {@link OutputStream} for the output fasta file. * @return this builder */ - public FastaReferenceWriterBuilder setFastaOutput(final OutputStream fastaOutput) { this.fastaOutput = fastaOutput; this.fastaFile = null; @@ -250,42 +248,47 @@ public FastaReferenceWriterBuilder setDictOutput(final OutputStream dictOutput) */ public FastaReferenceWriter build() throws IOException { if (fastaFile == null && fastaOutput == null) { - throw new IllegalArgumentException("Both fastaFile and fastaOutput were null. Please set one of them to be non-null."); + throw new IllegalArgumentException( + "Both fastaFile and fastaOutput were null. Please set one of them to be non-null."); } - if(fastaFile != null) { + if (fastaFile != null) { if (faiIndexFile == null && faiIndexOutput == null) { faiIndexFile = defaultFaiFile(makeFaiOutput, fastaFile); // Asserting that we were not provided two paths for fai output files } else if (faiIndexFile != null && faiIndexOutput != null) { - throw new IllegalArgumentException("Both faiIndexFile and faiIndexOutput were non-null. Please set one of them to be null."); + throw new IllegalArgumentException( + "Both faiIndexFile and faiIndexOutput were non-null. Please set one of them to be null."); } if (dictFile == null && dictOutput == null) { dictFile = defaultDictFile(makeDictOutput, fastaFile); // Asserting that we were not provided two paths for dict output files } else if (dictFile != null && dictOutput != null) { - throw new IllegalArgumentException("Both dictFile and dictOutput were non-null. Please set one of them to be null."); + throw new IllegalArgumentException( + "Both dictFile and dictOutput were non-null. Please set one of them to be null."); } if (gzippedFastaFile && gziIndexFile == null && gziIndexOutput == null) { gziIndexFile = defaultGziFile(makeGziOutput, fastaFile); // Asserting that we were not given a path for a gzi output for a non-block compressed output } else if (!gzippedFastaFile && (gziIndexFile != null || gziIndexOutput != null)) { - throw new IllegalArgumentException("Requested a gzi index but the output format fasta file was not a block compressed gzip file"); + throw new IllegalArgumentException( + "Requested a gzi index but the output format fasta file was not a block compressed gzip file"); } // Asserting that we were not provided two paths for gzi output files if (gziIndexFile != null && gziIndexOutput != null) { - throw new IllegalArgumentException("Both dictFile and dictOutput were non-null. Please set one of them to be null."); + throw new IllegalArgumentException( + "Both dictFile and dictOutput were non-null. Please set one of them to be null."); } // Asserting that that a Gzi output must accompany the fai output for block compressed output - if ((faiIndexFile != null || faiIndexOutput != null) && - gzippedFastaFile && - (gziIndexFile == null || gziIndexOutput != null)) { - throw new IllegalArgumentException("Requested a fai index file for a block compressed output file. This index is unusable without a gzi index file as well"); + if ((faiIndexFile != null || faiIndexOutput != null) + && gzippedFastaFile + && (gziIndexFile == null || gziIndexOutput != null)) { + throw new IllegalArgumentException( + "Requested a fai index file for a block compressed output file. This index is unusable without a gzi index file as well"); } - } // checkout bases-perline first, so that files are not created if failure; checkBasesPerLine(basesPerLine); diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java index b6bef4811a..6da325eb7a 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceFile.java @@ -33,7 +33,6 @@ import htsjdk.samtools.util.FastLineReader; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.StringUtil; - import java.io.File; import java.io.IOException; import java.nio.file.Path; @@ -51,7 +50,6 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile { private int sequenceIndex = -1; private final byte[] basesBuffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE]; - /** Constructs a FastaSequenceFile that reads from the specified file. */ public FastaSequenceFile(final File file, final boolean truncateNamesAtWhitespace) { this(IOUtil.toPath(file), truncateNamesAtWhitespace); @@ -84,7 +82,11 @@ public FastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final bo * Constructs a FastaSequenceFile that reads from the specified stream (which must not be compressed, i.e. * the caller is responsible for decompressing the stream). */ - public FastaSequenceFile(String source, final SeekableStream seekableStream, SAMSequenceDictionary dictionary, final boolean truncateNamesAtWhitespace) { + public FastaSequenceFile( + String source, + final SeekableStream seekableStream, + SAMSequenceDictionary dictionary, + final boolean truncateNamesAtWhitespace) { super(null, source, dictionary); this.truncateNamesAtWhitespace = truncateNamesAtWhitespace; this.seekableStream = seekableStream; @@ -111,7 +113,9 @@ public ReferenceSequence nextSequence() { } // Read the sequence - final int knownLength = (getSequenceDictionary() == null) ? -1 : getSequenceDictionary().getSequence(this.sequenceIndex).getSequenceLength(); + final int knownLength = (getSequenceDictionary() == null) + ? -1 + : getSequenceDictionary().getSequence(this.sequenceIndex).getSequenceLength(); final byte[] bases = readSequence(knownLength); return new ReferenceSequence(name, this.sequenceIndex, bases); } @@ -139,8 +143,8 @@ private String readSequenceName() { } final byte b = in.getByte(); if (b != '>') { - throw new SAMException("Format exception reading FASTA " + getSource() + ". Expected > but saw chr(" + - b + ") at start of sequence with index " + this.sequenceIndex); + throw new SAMException("Format exception reading FASTA " + getSource() + ". Expected > but saw chr(" + b + + ") at start of sequence with index " + this.sequenceIndex); } final byte[] nameBuffer = new byte[4096]; int nameLength = 0; @@ -169,7 +173,7 @@ private String readSequenceName() { * @return ASCII bases for sequence */ private byte[] readSequence(final int knownLength) { - byte[] bases = (knownLength == -1) ? basesBuffer : new byte[knownLength] ; + byte[] bases = (knownLength == -1) ? basesBuffer : new byte[knownLength]; int sequenceLength = 0; while (!in.eof()) { @@ -190,9 +194,9 @@ private byte[] readSequence(final int knownLength) { break; } if (sequenceLength == bases.length) { - final byte[] tmp = new byte[bases.length * 2]; - System.arraycopy(bases, 0, tmp, 0, sequenceLength); - bases = tmp; + final byte[] tmp = new byte[bases.length * 2]; + System.arraycopy(bases, 0, tmp, 0, sequenceLength); + bases = tmp; } } diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java index 735ab6347f..32e58bd0c5 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndex.java @@ -27,12 +27,10 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceRecord; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.io.PrintStream; import java.nio.file.Files; import java.nio.file.Path; @@ -50,14 +48,15 @@ public class FastaSequenceIndex implements Iterable { /** * Store the entries. Use a LinkedHashMap for consistent iteration in insertion order. */ - private final Map sequenceEntries = new LinkedHashMap(); + private final Map sequenceEntries = + new LinkedHashMap(); /** * Build a sequence index from the specified file. * @param indexFile File to open. * @throws FileNotFoundException if the index file cannot be found. */ - public FastaSequenceIndex( File indexFile ) { + public FastaSequenceIndex(File indexFile) { this(IOUtil.toPath(indexFile)); } @@ -66,7 +65,7 @@ public FastaSequenceIndex( File indexFile ) { * @param indexFile File to open. * @throws FileNotFoundException if the index file cannot be found. */ - public FastaSequenceIndex( Path indexFile ) { + public FastaSequenceIndex(Path indexFile) { IOUtil.assertFileIsReadable(indexFile); try (InputStream in = Files.newInputStream(indexFile)) { parseIndexFile(in); @@ -93,7 +92,7 @@ protected FastaSequenceIndex() {} * @param indexEntry New index entry to add. */ protected void add(FastaSequenceIndexEntry indexEntry) { - final FastaSequenceIndexEntry ret = sequenceEntries.put(indexEntry.getContig(),indexEntry); + final FastaSequenceIndexEntry ret = sequenceEntries.put(indexEntry.getContig(), indexEntry); if (ret != null) { throw new SAMException("Contig '" + indexEntry.getContig() + "' already exists in fasta index."); } @@ -104,7 +103,7 @@ protected void add(FastaSequenceIndexEntry indexEntry) { * @param entry entry to update. * @param newName New name for the index entry. */ - protected void rename(FastaSequenceIndexEntry entry,String newName) { + protected void rename(FastaSequenceIndexEntry entry, String newName) { sequenceEntries.remove(entry.getContig()); entry.setContig(newName); add(entry); @@ -116,22 +115,18 @@ protected void rename(FastaSequenceIndexEntry entry,String newName) { * @return True if index has the same entries as other instance, in the same order */ public boolean equals(Object other) { - if(!(other instanceof FastaSequenceIndex)) - return false; + if (!(other instanceof FastaSequenceIndex)) return false; if (this == other) return true; - FastaSequenceIndex otherIndex = (FastaSequenceIndex)other; - if(this.size() != otherIndex.size()) - return false; + FastaSequenceIndex otherIndex = (FastaSequenceIndex) other; + if (this.size() != otherIndex.size()) return false; Iterator iter = this.iterator(); Iterator otherIter = otherIndex.iterator(); while (iter.hasNext()) { - if (!otherIter.hasNext()) - return false; - if (!iter.next().equals(otherIter.next())) - return false; + if (!otherIter.hasNext()) return false; + if (!iter.next().equals(otherIter.next())) return false; } return true; } @@ -148,13 +143,12 @@ public int hashCode() { private void parseIndexFile(InputStream in) { try (Scanner scanner = new Scanner(in)) { int sequenceIndex = 0; - while( scanner.hasNext() ) { + while (scanner.hasNext()) { // Tokenize and validate the index line. String result = scanner.findInLine("(.+)\\t+(\\d+)\\s+(\\d+)\\s+(\\d+)\\s+(\\d+)"); - if( result == null ) - throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); + if (result == null) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); MatchResult tokens = scanner.match(); - if( tokens.groupCount() != 5 ) + if (tokens.groupCount() != 5) throw new SAMException("Found invalid line in index file:" + scanner.nextLine()); // Skip past the line separator @@ -169,7 +163,7 @@ private void parseIndexFile(InputStream in) { contig = SAMSequenceRecord.truncateSequenceName(contig); // Build sequence structure - add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) ); + add(new FastaSequenceIndexEntry(contig, location, size, basesPerLine, bytesPerLine, sequenceIndex++)); } } } @@ -183,15 +177,15 @@ private void parseIndexFile(InputStream in) { */ public void write(final Path indexFile) throws IOException { try (final PrintStream writer = new PrintStream(Files.newOutputStream(indexFile))) { - sequenceEntries.values().forEach(se -> - writer.println(String.join("\t", + sequenceEntries + .values() + .forEach(se -> writer.println(String.join( + "\t", se.getContig(), String.valueOf(se.getSize()), String.valueOf(se.getLocation()), String.valueOf(se.getBasesPerLine()), - String.valueOf(se.getBytesPerLine())) - ) - ); + String.valueOf(se.getBytesPerLine())))); } } @@ -200,7 +194,7 @@ public void write(final Path indexFile) throws IOException { * @param contigName The contig name for which to search. * @return True if contig name is present; false otherwise. */ - public boolean hasIndexEntry( String contigName ) { + public boolean hasIndexEntry(String contigName) { return sequenceEntries.containsKey(contigName); } @@ -210,9 +204,8 @@ public boolean hasIndexEntry( String contigName ) { * @return Index entry associated with the given contig. * @throws SAMException if the associated index entry can't be found. */ - public FastaSequenceIndexEntry getIndexEntry( String contigName ) { - if( !hasIndexEntry(contigName) ) - throw new SAMException("Unable to find entry for contig: " + contigName); + public FastaSequenceIndexEntry getIndexEntry(String contigName) { + if (!hasIndexEntry(contigName)) throw new SAMException("Unable to find entry for contig: " + contigName); return sequenceEntries.get(contigName); } diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java index 889f4ae699..5c256d0f00 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexCreator.java @@ -30,7 +30,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.PositionalBufferedStream; - import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -98,7 +97,8 @@ private static InputStream optionallyWrapAsPositional(final InputStream input) { * @throws IOException if an IO error occurs. */ public static FastaSequenceIndex buildFromFasta(final Path fastaFile) throws IOException, SAMException { - try(final AsciiLineReader in = AsciiLineReader.from(optionallyWrapAsPositional(IOUtil.openFileForReading(fastaFile)))) { + try (final AsciiLineReader in = + AsciiLineReader.from(optionallyWrapAsPositional(IOUtil.openFileForReading(fastaFile)))) { // sanity check reference format: // 1. Non-empty file @@ -126,7 +126,8 @@ public static FastaSequenceIndex buildFromFasta(final Path fastaFile) throws IOE // first entry should be skipped; otherwise it should be added to the index if (entry != null) index.add(entry.build()); // creates a new entry (and update sequence index) - entry = new FaiEntryBuilder(sequenceIndex++, previous, line, in.getLineTerminatorLength(), location); + entry = new FaiEntryBuilder( + sequenceIndex++, previous, line, in.getLineTerminatorLength(), location); } else if (line != null && line.charAt(0) == '>') { // update the location, next iteration the sequence will be handled location = in.getPosition(); @@ -159,7 +160,12 @@ private static class FaiEntryBuilder { // flag to check if the supposedly last line was already reached private boolean lessBasesFound; - private FaiEntryBuilder(final int index, final String header, final String firstSequenceLine, final int endOfLineLength, final long location) { + private FaiEntryBuilder( + final int index, + final String header, + final String firstSequenceLine, + final int endOfLineLength, + final long location) { if (header == null || header.charAt(0) != '>') { throw new SAMException("Wrong sequence header: " + header); } else if (firstSequenceLine == null) { @@ -167,7 +173,8 @@ private FaiEntryBuilder(final int index, final String header, final String first } this.index = index; // parse the contig name (without the starting '>' and truncating white-spaces) - this.contig = SAMSequenceRecord.truncateSequenceName(header.substring(1).trim()); + this.contig = + SAMSequenceRecord.truncateSequenceName(header.substring(1).trim()); this.location = location; this.basesPerLine = firstSequenceLine.length(); this.endOfLineLength = endOfLineLength; @@ -180,12 +187,18 @@ private void updateWithSequence(final String sequence, final int endOfLineLength throw new SAMException(String.format("Different end of line for the same sequence was found.")); } if (sequence.length() > basesPerLine) { - throw new SAMException(String.format("Sequence line for {} was longer than the expected length ({}): {}", - contig, basesPerLine, sequence)); + throw new SAMException(String.format( + "Sequence line for {} was longer than the expected length ({}): {}", + contig, + basesPerLine, + sequence)); } else if (sequence.length() < basesPerLine) { if (lessBasesFound) { - throw new SAMException(String.format("Only last line could have less than {} bases for '{}' sequence, but at least two are different. Last sequence line: {}", - basesPerLine, contig, sequence)); + throw new SAMException(String.format( + "Only last line could have less than {} bases for '{}' sequence, but at least two are different. Last sequence line: {}", + basesPerLine, + contig, + sequence)); } lessBasesFound = true; } @@ -194,7 +207,8 @@ private void updateWithSequence(final String sequence, final int endOfLineLength } private FastaSequenceIndexEntry build() { - return new FastaSequenceIndexEntry(contig, location, size, basesPerLine, basesPerLine + endOfLineLength, index); + return new FastaSequenceIndexEntry( + contig, location, size, basesPerLine, basesPerLine + endOfLineLength, index); } } } diff --git a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexEntry.java b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexEntry.java index f8bbf4e157..a37f1be896 100644 --- a/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexEntry.java +++ b/src/main/java/htsjdk/samtools/reference/FastaSequenceIndexEntry.java @@ -43,12 +43,8 @@ public class FastaSequenceIndexEntry { * @param basesPerLine How many bases are on each line. * @param bytesPerLine How many bytes are on each line (includes newline characters). */ - public FastaSequenceIndexEntry( String contig, - long location, - long size, - int basesPerLine, - int bytesPerLine, - int sequenceIndex) { + public FastaSequenceIndexEntry( + String contig, long location, long size, int basesPerLine, int bytesPerLine, int sequenceIndex) { this.contig = contig; this.location = location; this.size = size; @@ -68,7 +64,7 @@ public String getContig() { /** * Sometimes contigs need to be adjusted on-the-fly to * match sequence dictionary entries. Provide that capability - * to other classes w/i the package. + * to other classes w/i the package. * @param contig New value for the contig. */ protected void setContig(String contig) { @@ -117,11 +113,9 @@ public int getSequenceIndex() { * @return A string representation of the contig line. */ public String toString() { - return String.format("contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", contig, - location, - size, - basesPerLine, - bytesPerLine ); + return String.format( + "contig %s; location %d; size %d; basesPerLine %d; bytesPerLine %d", + contig, location, size, basesPerLine, bytesPerLine); } /** @@ -130,14 +124,16 @@ public String toString() { * @return True if each has the same name, location, size, basesPerLine and bytesPerLine */ public boolean equals(Object other) { - if(!(other instanceof FastaSequenceIndexEntry)) - return false; + if (!(other instanceof FastaSequenceIndexEntry)) return false; if (this == other) return true; - FastaSequenceIndexEntry otherEntry = (FastaSequenceIndexEntry)other; - return (contig.equals(otherEntry.contig) && size == otherEntry.size && location == otherEntry.location - && basesPerLine == otherEntry.basesPerLine && bytesPerLine == otherEntry.bytesPerLine); + FastaSequenceIndexEntry otherEntry = (FastaSequenceIndexEntry) other; + return (contig.equals(otherEntry.contig) + && size == otherEntry.size + && location == otherEntry.location + && basesPerLine == otherEntry.basesPerLine + && bytesPerLine == otherEntry.bytesPerLine); } /** diff --git a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java index 26a1f352db..602acf7e0d 100644 --- a/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java @@ -30,7 +30,6 @@ import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.IOUtil; - import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -122,7 +121,8 @@ public IndexedFastaSequenceFile(final Path path) throws FileNotFoundException { * @param index The fasta index. * @param dictionary The sequence dictionary, or null if there isn't one. */ - public IndexedFastaSequenceFile(String source, final SeekableStream in, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { + public IndexedFastaSequenceFile( + String source, final SeekableStream in, final FastaSequenceIndex index, SAMSequenceDictionary dictionary) { super(source, index, dictionary); this.channel = new ReadableSeekableStreamByteChannel(in); } @@ -144,8 +144,7 @@ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { if (IOUtil.isBlockCompressed(fastaFile, true)) { return false; } - return (Files.exists(fastaFile) && - findFastaIndex(fastaFile) != null); + return (Files.exists(fastaFile) && findFastaIndex(fastaFile) != null); } catch (IOException e) { return false; } @@ -162,7 +161,7 @@ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { @Override protected int readFromPosition(final ByteBuffer buffer, long position) throws IOException { if (channel instanceof FileChannel) { // special case to take advantage of native code path - return ((FileChannel) channel).read(buffer,position); + return ((FileChannel) channel).read(buffer, position); } else { long oldPos = channel.position(); try { diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequence.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequence.java index 33ea1c602e..5d4c78da8a 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequence.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequence.java @@ -53,7 +53,9 @@ public ReferenceSequence(String name, int index, byte[] bases) { } /** Gets the set of names given to this sequence in the source file. */ - public String getName() { return name; } + public String getName() { + return name; + } /** * Gets the array of bases that define this sequence. The bases can include any @@ -61,7 +63,9 @@ public ReferenceSequence(String name, int index, byte[] bases) { * letters. This array is mutable (obviously!) and it NOT a clone of the array * held interally. Do not modify it!!! */ - public byte[] getBases() { return bases; } + public byte[] getBases() { + return bases; + } /** * Returns the bases represented by this ReferenceSequence as a String. Since this will copy the bases @@ -71,14 +75,20 @@ public ReferenceSequence(String name, int index, byte[] bases) { * * @return The set of bases represented by this ReferenceSequence, as a String */ - public String getBaseString() { return StringUtil.bytesToString(bases); } + public String getBaseString() { + return StringUtil.bytesToString(bases); + } /** Gets the 0-based index of this contig in the source file from which it came. */ - public int getContigIndex() { return contigIndex; } + public int getContigIndex() { + return contigIndex; + } /** Gets the length of this reference sequence in bases. */ - public int length() { return length; } - + public int length() { + return length; + } + public String toString() { return "ReferenceSequence " + getName(); } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java index 89afa12753..5938aa0975 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFile.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.util.Locatable; - import java.io.Closeable; import java.io.IOException; @@ -58,7 +57,6 @@ public interface ReferenceSequenceFile extends Closeable { */ public void reset(); - /** * @return true if getSequence and getSubsequenceAt methods are allowed. */ @@ -70,7 +68,7 @@ public interface ReferenceSequenceFile extends Closeable { * @return The full sequence associated with this contig. * @throws UnsupportedOperationException if !sIndexed. */ - public ReferenceSequence getSequence( String contig ); + public ReferenceSequence getSequence(String contig); /** * Gets the subsequence of the contig in the range [start,stop] @@ -80,7 +78,7 @@ public interface ReferenceSequenceFile extends Closeable { * @return The partial reference sequence associated with this range. * @throws UnsupportedOperationException if !sIndexed. */ - public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ); + public ReferenceSequence getSubsequenceAt(String contig, long start, long stop); /** * Gets the subsequence of the contig in the locatable. Shortcut to getSubsequenceAt(locatable.getContig(), locatable.getStart(), locatable.getEnd()); @@ -88,7 +86,7 @@ public interface ReferenceSequenceFile extends Closeable { * @return The partial reference sequence associated with this location. * @throws UnsupportedOperationException if !sIndexed. */ - public default ReferenceSequence getSubsequenceAt(final Locatable locatable ) { + public default ReferenceSequence getSubsequenceAt(final Locatable locatable) { return getSubsequenceAt(locatable.getContig(), locatable.getStart(), locatable.getEnd()); } diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java index 3247ad32c1..c6f4e30490 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java @@ -32,16 +32,15 @@ import htsjdk.io.HtsPath; import htsjdk.io.IOPath; import htsjdk.samtools.SAMException; -import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMTextHeaderCodec; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.FileExtensions; +import htsjdk.samtools.util.GZIIndex; import htsjdk.samtools.util.IOUtil; import htsjdk.utils.ValidationUtils; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -89,7 +88,8 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final File file) { * @param file the reference sequence file on disk * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ - public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final File file, final boolean truncateNamesAtWhitespace) { return getReferenceSequenceFile(file, truncateNamesAtWhitespace, true); } @@ -101,7 +101,8 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final File file, fi * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ - public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final File file, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { return getReferenceSequenceFile(IOUtil.toPath(file), HtsPath::new, truncateNamesAtWhitespace, preferIndexed); } @@ -123,7 +124,8 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path) { * @param path the reference sequence file on disk * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ - public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final Path path, final boolean truncateNamesAtWhitespace) { return getReferenceSequenceFile(path, HtsPath::new, truncateNamesAtWhitespace, true); } @@ -135,13 +137,16 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, fi * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ - public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { // this should thrown an exception if the fasta file is not supported getFastaExtension(path); // Using faidx requires truncateNamesAtWhitespace if (truncateNamesAtWhitespace && preferIndexed && canCreateIndexedFastaReader(path)) { try { - return IOUtil.isBlockCompressed(path, true) ? new BlockCompressedIndexedFastaSequenceFile(path) : new IndexedFastaSequenceFile(path); + return IOUtil.isBlockCompressed(path, true) + ? new BlockCompressedIndexedFastaSequenceFile(path) + : new IndexedFastaSequenceFile(path); } catch (final IOException e) { throw new SAMException("Error opening FASTA: " + path, e); } @@ -185,21 +190,22 @@ public static ReferenceSequenceFile getReferenceSequenceFile( * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader */ public static ReferenceSequenceFile getReferenceSequenceFileFromBundle( - final Bundle referenceBundle, - final boolean truncateNamesAtWhitespace, - final boolean preferIndexed) { + final Bundle referenceBundle, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) { ValidationUtils.nonNull(referenceBundle, "reference bundle"); // required fasta path final BundleResource fastaResource = referenceBundle.getOrThrow(BundleResourceType.CT_HAPLOID_REFERENCE); - final IOPath fastaPath = fastaResource.getIOPath().orElseThrow( - () -> new RuntimeException("The fasta bundle resource must contain a fasta resource that is backed by an IOPath.")); + final IOPath fastaPath = fastaResource + .getIOPath() + .orElseThrow(() -> new RuntimeException( + "The fasta bundle resource must contain a fasta resource that is backed by an IOPath.")); if (!Files.exists(fastaPath.toPath())) { throw new RuntimeException(String.format("FASTA file %s does not exist", fastaPath)); } // optional dictionary path - IOPath dictPath = getSecondaryBundleResource(referenceBundle, BundleResourceType.CT_REFERENCE_DICTIONARY, "Sequence dictionary"); + IOPath dictPath = getSecondaryBundleResource( + referenceBundle, BundleResourceType.CT_REFERENCE_DICTIONARY, "Sequence dictionary"); // optional index. Using faidx requires truncateNamesAtWhitespace IOPath indexPath = null; @@ -208,12 +214,17 @@ public static ReferenceSequenceFile getReferenceSequenceFileFromBundle( if (!truncateNamesAtWhitespace) { throw new RuntimeException("preferIndexed option requires truncateNamesAtWhitespace"); } - indexPath = getSecondaryBundleResource(referenceBundle, BundleResourceType.CT_REFERENCE_INDEX, "FASTA index"); - gziIndexPath = getSecondaryBundleResource(referenceBundle, BundleResourceType.CT_REFERENCE_INDEX_GZI, "GZI index"); + indexPath = + getSecondaryBundleResource(referenceBundle, BundleResourceType.CT_REFERENCE_INDEX, "FASTA index"); + gziIndexPath = + getSecondaryBundleResource(referenceBundle, BundleResourceType.CT_REFERENCE_INDEX_GZI, "GZI index"); } try { - if (IOUtil.isBlockCompressed(fastaPath.toPath(), true) && preferIndexed && indexPath != null && gziIndexPath != null) { + if (IOUtil.isBlockCompressed(fastaPath.toPath(), true) + && preferIndexed + && indexPath != null + && gziIndexPath != null) { return new BlockCompressedIndexedFastaSequenceFile( fastaPath, dictPath, @@ -238,7 +249,8 @@ private static IOPath getSecondaryBundleResource(Bundle bundle, String secondary } else { throw new RuntimeException(String.format("%s file %s does not exist", description, path)); } - }).orElse(null); + }) + .orElse(null); } /** @@ -262,8 +274,8 @@ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { // open the file for checking for block-compressed input try { // if it is bgzip, it requires the .gzi index - return !IOUtil.isBlockCompressed(fastaFile, true) || - Files.exists(GZIIndex.resolveIndexNameForBgzipFile(fastaFile)); + return !IOUtil.isBlockCompressed(fastaFile, true) + || Files.exists(GZIIndex.resolveIndexNameForBgzipFile(fastaFile)); } catch (IOException e) { return false; } @@ -279,7 +291,8 @@ public static boolean canCreateIndexedFastaReader(final Path fastaFile) { * @param in The input stream to read the fasta file from. * @param index The index, or null to return a non-indexed reader. */ - public static ReferenceSequenceFile getReferenceSequenceFile(final String source, final SeekableStream in, final FastaSequenceIndex index) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final String source, final SeekableStream in, final FastaSequenceIndex index) { return getReferenceSequenceFile(source, in, index, null, true); } @@ -293,7 +306,12 @@ public static ReferenceSequenceFile getReferenceSequenceFile(final String source * @param dictionary The sequence dictionary, or null if there isn't one. * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name */ - public static ReferenceSequenceFile getReferenceSequenceFile(final String source, final SeekableStream in, final FastaSequenceIndex index, final SAMSequenceDictionary dictionary, final boolean truncateNamesAtWhitespace) { + public static ReferenceSequenceFile getReferenceSequenceFile( + final String source, + final SeekableStream in, + final FastaSequenceIndex index, + final SAMSequenceDictionary dictionary, + final boolean truncateNamesAtWhitespace) { if (truncateNamesAtWhitespace && index != null) { return new IndexedFastaSequenceFile(source, in, index, dictionary); } @@ -345,8 +363,11 @@ public static SAMSequenceDictionary loadDictionary(final InputStream in) { */ public static String getFastaExtension(final Path path) { final String name = path.getFileName().toString(); - return FileExtensions.FASTA.stream().filter(name::endsWith).findFirst() - .orElseThrow(() -> new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath())); + return FileExtensions.FASTA.stream() + .filter(name::endsWith) + .findFirst() + .orElseThrow(() -> new IllegalArgumentException( + "File is not a supported reference file type: " + path.toAbsolutePath())); } /** diff --git a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java index e0cf33a314..08e9ad69d8 100644 --- a/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java +++ b/src/main/java/htsjdk/samtools/reference/ReferenceSequenceFileWalker.java @@ -27,7 +27,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; - import java.io.Closeable; import java.io.File; import java.io.IOException; @@ -65,12 +64,12 @@ public ReferenceSequence get(final int sequenceIndex, final String sequenceName, get(sequenceIndex); if (!referenceSequence.getName().equals(sequenceName)) { // Sanity check the sequence names against the sequence dictionary while scanning through. - throw new SAMException("Sequence name mismatch at sequence index (" + referenceSequence.getContigIndex() + - ", " + referenceSequence.getName() + ") != " + sequenceName); + throw new SAMException("Sequence name mismatch at sequence index (" + referenceSequence.getContigIndex() + + ", " + referenceSequence.getName() + ") != " + sequenceName); } if (referenceSequence.getBases().length != length) { - throw new SAMException("Sequence length mismatch for (" + sequenceIndex + ", " + sequenceName + - "). expected " + length + " but found " + referenceSequence.getBases().length); + throw new SAMException("Sequence length mismatch for (" + sequenceIndex + ", " + sequenceName + + "). expected " + length + " but found " + referenceSequence.getBases().length); } return referenceSequence; } @@ -84,25 +83,25 @@ public ReferenceSequence get(final int sequenceIndex) { return referenceSequence; } if (referenceSequence != null && referenceSequence.getContigIndex() > sequenceIndex) { - throw new SAMException("Requesting earlier reference sequence: " + sequenceIndex + " < " + - referenceSequence.getContigIndex()); + throw new SAMException("Requesting earlier reference sequence: " + sequenceIndex + " < " + + referenceSequence.getContigIndex()); } referenceSequence = null; - if(referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) { - final SAMSequenceRecord samSequenceRecord = referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex); - if(samSequenceRecord != null) { - referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()) ; + if (referenceSequenceFile.isIndexed() && referenceSequenceFile.getSequenceDictionary() != null) { + final SAMSequenceRecord samSequenceRecord = + referenceSequenceFile.getSequenceDictionary().getSequence(sequenceIndex); + if (samSequenceRecord != null) { + referenceSequence = referenceSequenceFile.getSequence(samSequenceRecord.getSequenceName()); } // else referenceSequence will remain null } else { do { referenceSequence = referenceSequenceFile.nextSequence(); - } - while (referenceSequence != null && referenceSequence.getContigIndex() < sequenceIndex); + } while (referenceSequence != null && referenceSequence.getContigIndex() < sequenceIndex); } if (referenceSequence == null || referenceSequence.getContigIndex() != sequenceIndex) { - throw new SAMException("Reference sequence (" + sequenceIndex + - ") not found in " + referenceSequenceFile.toString()); + throw new SAMException( + "Reference sequence (" + sequenceIndex + ") not found in " + referenceSequenceFile.toString()); } return referenceSequence; } diff --git a/src/main/java/htsjdk/samtools/reference/SamLocusAndReferenceIterator.java b/src/main/java/htsjdk/samtools/reference/SamLocusAndReferenceIterator.java index bae8641571..0f4b244189 100644 --- a/src/main/java/htsjdk/samtools/reference/SamLocusAndReferenceIterator.java +++ b/src/main/java/htsjdk/samtools/reference/SamLocusAndReferenceIterator.java @@ -23,14 +23,13 @@ */ package htsjdk.samtools.reference; +import static htsjdk.samtools.util.SamLocusIterator.*; + import htsjdk.samtools.util.IterableOnceIterator; import htsjdk.samtools.util.SamLocusIterator; import htsjdk.samtools.util.SequenceUtil; - import java.util.List; -import static htsjdk.samtools.util.SamLocusIterator.*; - /** * Iterator that traverses a SAM File and a ReferenceFile, accumulating information on a per-locus basis. * Only loci that are covered by the input reads are returned. @@ -39,10 +38,11 @@ * * @author Yossi Farjoun */ -public class SamLocusAndReferenceIterator extends IterableOnceIterator { +public class SamLocusAndReferenceIterator + extends IterableOnceIterator { /** The base to use to indicate the locus is prior to the reference start (i.e. position zero). */ - final static byte BASE_BEFORE_REFERENCE_START = (byte) '-'; + static final byte BASE_BEFORE_REFERENCE_START = (byte) '-'; private final ReferenceSequenceFileWalker referenceSequenceFileWalker; private final SamLocusIterator locusIterator; @@ -57,14 +57,14 @@ public class SamLocusAndReferenceIterator extends IterableOnceIterator wrapper) throws IOException { - if(wrapper != null) { + default SeekableStream getStreamFor(String path, Function wrapper) + throws IOException { + if (wrapper != null) { throw new UnsupportedOperationException("This factory doesn't support adding wrappers"); } else { return this.getStreamFor(path); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java index 8a1f1d5521..682bb4cbe7 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableBufferedStream.java @@ -62,10 +62,9 @@ void changePos(int delta) { } } - public static final int DEFAULT_BUFFER_SIZE = 512000; - final private int bufferSize; + private final int bufferSize; final SeekableStream wrappedStream; ExtBufferedInputStream bufferedStream; long position; @@ -132,10 +131,10 @@ public int read() throws IOException { public int read(final byte[] buffer, final int offset, final int length) throws IOException { int nBytesRead = bufferedStream.read(buffer, offset, length); if (nBytesRead > 0) { - //if we can't read as many bytes as we are asking for then attempt another read to reset the buffer. + // if we can't read as many bytes as we are asking for then attempt another read to reset the buffer. if (nBytesRead < length) { final int additionalBytesRead = bufferedStream.read(buffer, nBytesRead + offset, length - nBytesRead); - //if there were additional bytes read then update nBytesRead + // if there were additional bytes read then update nBytesRead if (additionalBytesRead > 0) { nBytesRead += additionalBytesRead; } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java index 1723747d5a..7e7c9b33f9 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStream.java @@ -31,6 +31,7 @@ public class SeekableFTPStream extends SeekableStream { SeekableFTPStreamHelper helper; + public SeekableFTPStream(URL url) throws IOException { this(url, null); } @@ -56,7 +57,7 @@ public boolean eof() throws IOException { @Override public String getSource() { - return null; //TODO + return null; // TODO } @Override @@ -64,19 +65,16 @@ public long length() { return helper.length(); } - @Override public long skip(long n) throws IOException { return helper.skip(n); } - @Override public int read(byte[] buffer, int offset, int len) throws IOException { return helper.read(buffer, offset, len); } - @Override public void close() throws IOException { helper.close(); @@ -87,12 +85,15 @@ public int read() throws IOException { return helper.read(); } -// private static final String EXPECTED = "Apache Software Foundation"; + // private static final String EXPECTED = "Apache Software Foundation"; private static final String EXPECTED1 = "\u00cf\u00ac\u00c9\u0075\u0043\u00d4\u00d5\u0079"; private static final String EXPECTED2 = "\u00e4\u006c\u0077\u000c\u0016\u00f1\u0030\u008f"; + public static void main(String[] args) throws IOException { -// String testURL = (args.length < 1) ? "ftp://apache.cs.utah.edu/apache.org/HEADER.html" : args[0]; - String testURL = (args.length < 1) ? "ftp://hgdownload.cse.ucsc.edu/goldenPath/panTro3/vsHg19/panTro3.hg19.all.chain.gz" : args[0]; + // String testURL = (args.length < 1) ? "ftp://apache.cs.utah.edu/apache.org/HEADER.html" : args[0]; + String testURL = (args.length < 1) + ? "ftp://hgdownload.cse.ucsc.edu/goldenPath/panTro3/vsHg19/panTro3.hg19.all.chain.gz" + : args[0]; long startPosition = (args.length < 2) ? 0x0b66c78l : Long.parseLong(args[1]); int len = (args.length < 3) ? 8 : Integer.parseInt(args[2]); int skipLen = (args.length < 4) ? 0x18 : Integer.parseInt(args[3]); @@ -101,7 +102,7 @@ public static void main(String[] args) throws IOException { s.seek(startPosition); s.read(buffer, 0, len); if (s.position() != startPosition + len && s.position() != s.length()) { - System.out.println("1) updated position is incorrect"); + System.out.println("1) updated position is incorrect"); } String data = new String(buffer); System.out.println("1) read:" + data); @@ -112,7 +113,7 @@ public static void main(String[] args) throws IOException { s.skip(skipLen); s.read(buffer, 0, len); if (s.position() != startPosition + 2 * len + skipLen && s.position() != s.length()) { - System.out.println("2) updated position is incorrect"); + System.out.println("2) updated position is incorrect"); } String data2 = new String(buffer); System.out.println("2) read:" + data2); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStreamHelper.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStreamHelper.java index 10b1391b24..d0edc6f982 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStreamHelper.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableFTPStreamHelper.java @@ -21,7 +21,6 @@ import htsjdk.samtools.util.ftp.FTPClient; import htsjdk.samtools.util.ftp.FTPReply; import htsjdk.samtools.util.ftp.FTPUtils; - import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -50,10 +49,9 @@ public class SeekableFTPStreamHelper { ftp.binary(); FTPReply reply = ftp.size(path); - if(reply.isSuccess()) { + if (reply.isSuccess()) { contentLength = Long.parseLong(reply.getReplyString()); } - } public void seek(long position) { @@ -72,7 +70,6 @@ public long length() { return contentLength; } - public long skip(long n) throws IOException { long bytesToSkip = n; position += bytesToSkip; @@ -102,7 +99,7 @@ public int read(byte[] buffer, int offset, int len) throws IOException { FTPReply reply = ftp.pasv(); // If we are positioned at or beyond the EOF return -1 - if(contentLength >= 0 && position >= contentLength) { + if (contentLength >= 0 && position >= contentLength) { return -1; } @@ -142,7 +139,6 @@ public int read(byte[] buffer, int offset, int len) throws IOException { } } - private void reconnect() throws IOException { if (ftp != null) { ftp.disconnect(); @@ -150,7 +146,6 @@ private void reconnect() throws IOException { ftp = FTPUtils.connect(host, userInfo, userPasswordInput); } - public void close() throws IOException { if (ftp != null) { ftp.disconnect(); @@ -158,9 +153,7 @@ public void close() throws IOException { } } - public int read() throws IOException { throw new UnsupportedOperationException("read() is not supported on SeekableHTTPStream. Must read in blocks."); } - } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java index b790732a9b..81f4f65f15 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableFileStream.java @@ -38,7 +38,6 @@ public class SeekableFileStream extends SeekableStream { static Collection allInstances = Collections.synchronizedCollection(new HashSet()); - File file; RandomAccessFile fis; @@ -74,7 +73,7 @@ public long skip(long n) throws IOException { fis.getChannel().position(initPos + n); return position() - initPos; } - + @Override public int read(final byte[] buffer, final int offset, final int length) throws IOException { if (length < 0) { @@ -84,21 +83,20 @@ public int read(final byte[] buffer, final int offset, final int length) throws while (n < length) { final int count = fis.read(buffer, offset + n, length - n); if (count < 0) { - if (n > 0) { - return n; - } else { - return count; - } + if (n > 0) { + return n; + } else { + return count; + } } n += count; } return n; - } @Override public int read() throws IOException { - return fis.read(); + return fis.read(); } @Override @@ -111,12 +109,10 @@ public String getSource() { return file.getAbsolutePath(); } - @Override public void close() throws IOException { allInstances.remove(this); fis.close(); - } public static synchronized void closeAllInstances() { @@ -126,8 +122,8 @@ public static synchronized void closeAllInstances() { try { sfs.close(); } catch (IOException e) { - //TODO - //log.error("Error closing SeekableFileStream", e); + // TODO + // log.error("Error closing SeekableFileStream", e); } } allInstances.clear(); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java index a846fe4512..03dda69fd8 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableHTTPStream.java @@ -24,7 +24,6 @@ package htsjdk.samtools.seekablestream; import htsjdk.samtools.util.HttpUtils; - import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -44,7 +43,6 @@ public class SeekableHTTPStream extends SeekableStream { public SeekableHTTPStream(final URL url) { this(url, null); - } public SeekableHTTPStream(final URL url, Proxy proxy) { @@ -58,13 +56,11 @@ public SeekableHTTPStream(final URL url, Proxy proxy) { if (contentLengthString != null) { try { contentLength = Long.parseLong(contentLengthString); - } - catch (NumberFormatException ignored) { + } catch (NumberFormatException ignored) { System.err.println("WARNING: Invalid content length (" + contentLengthString + " for: " + url); contentLength = -1; } } - } @Override @@ -98,9 +94,9 @@ public void seek(final long position) { public int read(byte[] buffer, int offset, int len) throws IOException { if (offset < 0 || len < 0 || (offset + len) > buffer.length) { - throw new IndexOutOfBoundsException("Offset="+offset+",len="+len+",buflen="+buffer.length); + throw new IndexOutOfBoundsException("Offset=" + offset + ",len=" + len + ",buflen=" + buffer.length); } - if (len == 0 ) { + if (len == 0) { return 0; } if (position == contentLength) { @@ -112,9 +108,9 @@ public int read(byte[] buffer, int offset, int len) throws IOException { String byteRange = ""; int n = 0; try { - connection = proxy == null ? - (HttpURLConnection) url.openConnection() : - (HttpURLConnection) url.openConnection(proxy); + connection = proxy == null + ? (HttpURLConnection) url.openConnection() + : (HttpURLConnection) url.openConnection(proxy); long endRange = position + len - 1; // IF we know the total content length, limit the end range to that. @@ -142,9 +138,7 @@ public int read(byte[] buffer, int offset, int len) throws IOException { return n; - } - - catch (IOException e) { + } catch (IOException e) { // THis is a bit of a hack, but its not clear how else to handle this. If a byte range is specified // that goes past the end of the file the response code will be 416. The MAC os translates this to // an IOException with the 416 code in the message. Windows translates the error to an EOFException. @@ -163,9 +157,7 @@ public int read(byte[] buffer, int offset, int len) throws IOException { throw e; } - } - - finally { + } finally { if (is != null) { is.close(); } @@ -175,18 +167,16 @@ public int read(byte[] buffer, int offset, int len) throws IOException { } } - @Override public void close() throws IOException { // Nothing to do } - @Override public int read() throws IOException { - byte []tmp=new byte[1]; - read(tmp,0,1); - return (int) tmp[0] & 0xFF; + byte[] tmp = new byte[1]; + read(tmp, 0, 1); + return (int) tmp[0] & 0xFF; } @Override diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java index 37a5ab49ba..3b8d44cc16 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java @@ -60,5 +60,4 @@ public int read() throws IOException { public long position() throws IOException { return buf.position(); } - } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java index cb88512df4..2817c1e891 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekablePathStream.java @@ -16,7 +16,7 @@ */ public class SeekablePathStream extends SeekableStream { - private final static Log LOG = Log.getInstance(SeekablePathStream.class); + private static final Log LOG = Log.getInstance(SeekablePathStream.class); /** * Collection of all open instances. SeekablePathStream objects are usually open and kept open for the @@ -33,9 +33,10 @@ public SeekablePathStream(final Path path) throws IOException { this(path, null); } - public SeekablePathStream(final Path path, Function wrapper) throws IOException { + public SeekablePathStream(final Path path, Function wrapper) + throws IOException { this.path = path; - if (null==wrapper) { + if (null == wrapper) { this.sbc = Files.newByteChannel(path); } else { this.sbc = wrapper.apply(Files.newByteChannel(path)); @@ -85,11 +86,11 @@ public int read(final byte[] buffer, final int offset, final int length) throws while (n < length) { final int count = sbc.read(buf); if (count < 0) { - if (n > 0) { - return n; - } else { - return count; - } + if (n > 0) { + return n; + } else { + return count; + } } n += count; } @@ -108,7 +109,6 @@ public String getSource() { return path.toAbsolutePath().toString(); } - @Override public void close() throws IOException { ALL_INSTANCES.remove(this); diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java index 3ec3f81331..4aafa8b671 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStream.java @@ -24,7 +24,6 @@ package htsjdk.samtools.seekablestream; import htsjdk.samtools.util.RuntimeIOException; - import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -76,13 +75,13 @@ public abstract class SeekableStream extends InputStream { */ public void readFully(byte b[]) throws IOException { int len = b.length; - if (len < 0){ + if (len < 0) { throw new IndexOutOfBoundsException(); } int n = 0; while (n < len) { int count = read(b, n, len - n); - if (count < 0){ + if (count < 0) { throw new EOFException(); } n += count; @@ -109,7 +108,8 @@ public int available() throws IOException { final long remaining = length() - position(); if (remaining < 0) { // remaining might be negative if the length is not available (0) return 0; - } else if (remaining > Integer.MAX_VALUE) { // remaining might be bigger than Integer.MAX_VALUE for very large files + } else if (remaining + > Integer.MAX_VALUE) { // remaining might be bigger than Integer.MAX_VALUE for very large files return Integer.MAX_VALUE; } else { return (int) remaining; @@ -153,5 +153,4 @@ public synchronized void reset() throws IOException { public final boolean markSupported() { return true; } - } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index e0716d97e4..6ec8dde0a5 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -26,7 +26,6 @@ import htsjdk.io.HtsPath; import htsjdk.io.IOPath; import htsjdk.tribble.TribbleException; - import java.io.IOException; import java.net.URL; import java.nio.channels.SeekableByteChannel; @@ -40,7 +39,7 @@ * @author jrobinso * @date Nov 30, 2009 */ -public class SeekableStreamFactory{ +public class SeekableStreamFactory { private static final ISeekableStreamFactory DEFAULT_FACTORY; private static final String HTTP = "http"; @@ -50,21 +49,22 @@ public class SeekableStreamFactory{ * the set of url schemes that have special support in htsjdk that isn't through a FileSystemProvider */ private static final Set URL_SCHEMES_WITH_LEGACY_SUPPORT = Set.of(HTTP, FTP, HTTPS); + public static final String FILE_SCHEME = "file"; private static ISeekableStreamFactory currentFactory; - static{ + static { DEFAULT_FACTORY = new DefaultSeekableStreamFactory(); currentFactory = DEFAULT_FACTORY; } - private SeekableStreamFactory(){} + private SeekableStreamFactory() {} - public static void setInstance(final ISeekableStreamFactory factory){ + public static void setInstance(final ISeekableStreamFactory factory) { currentFactory = factory; } - public static ISeekableStreamFactory getInstance(){ + public static ISeekableStreamFactory getInstance() { return currentFactory; } @@ -86,14 +86,14 @@ public static boolean isFilePath(final String path) { * @param path a path to check * @return if the path is not being handled by a FileSystemProvider and it can be read by legacy streams */ - public static boolean isBeingHandledByLegacyUrlSupport(final String path){ - return !new HtsPath(path).hasFileSystemProvider() //if we have a provider for it that's what we'll use + public static boolean isBeingHandledByLegacyUrlSupport(final String path) { + return !new HtsPath(path).hasFileSystemProvider() // if we have a provider for it that's what we'll use && canBeHandledByLegacyUrlSupport(path); // otherwise we fall back to the special handlers } - //is this one of the url types that has legacy htsjdk support built in? + // is this one of the url types that has legacy htsjdk support built in? public static boolean canBeHandledByLegacyUrlSupport(final String path) { - return URL_SCHEMES_WITH_LEGACY_SUPPORT.stream().anyMatch(scheme-> path.startsWith(scheme +"://")); + return URL_SCHEMES_WITH_LEGACY_SUPPORT.stream().anyMatch(scheme -> path.startsWith(scheme + "://")); } private static class DefaultSeekableStreamFactory implements ISeekableStreamFactory { @@ -117,12 +117,11 @@ public SeekableStream getStreamFor(final String path) throws IOException { * @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied */ @Override - public SeekableStream getStreamFor(final String path, - Function wrapper) throws IOException { + public SeekableStream getStreamFor( + final String path, Function wrapper) throws IOException { return getStreamFor(new HtsPath(path), wrapper); } - /** * The wrapper will only be applied to the stream if the stream is treated as a non file:// {@link Path} * @@ -132,31 +131,32 @@ public SeekableStream getStreamFor(final String path, * @param wrapper a wrapper to apply to the stream allowing direct transformations on the byte stream to be applied * @throws IOException */ - public static SeekableStream getStreamFor(final IOPath path, Function wrapper) throws IOException { - if(path.hasFileSystemProvider()) { + public static SeekableStream getStreamFor( + final IOPath path, Function wrapper) throws IOException { + if (path.hasFileSystemProvider()) { return path.getScheme().equals(FILE_SCHEME) - ? new SeekableFileStream(path.toPath().toFile()) //don't apply the wrapper to local files + ? new SeekableFileStream(path.toPath().toFile()) // don't apply the wrapper to local files : new SeekablePathStream(path.toPath(), wrapper); } else { - return switch(path.getScheme()){ - case HTTP, HTTPS -> new SeekableHTTPStream(new URL(path.getRawInputString())); - case FTP -> new SeekableFTPStream((new URL(path.getRawInputString()))); - default -> throw new TribbleException("Unknown path type. No FileSystemProvider available for " + path.getRawInputString()); - }; + return switch (path.getScheme()) { + case HTTP, HTTPS -> new SeekableHTTPStream(new URL(path.getRawInputString())); + case FTP -> new SeekableFTPStream((new URL(path.getRawInputString()))); + default -> + throw new TribbleException( + "Unknown path type. No FileSystemProvider available for " + path.getRawInputString()); + }; } } @Override - public SeekableStream getBufferedStream(SeekableStream stream){ + public SeekableStream getBufferedStream(SeekableStream stream) { return getBufferedStream(stream, SeekableBufferedStream.DEFAULT_BUFFER_SIZE); } @Override - public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize){ + public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize) { if (bufferSize == 0) return stream; else return new SeekableBufferedStream(stream, bufferSize); } - } - } diff --git a/src/main/java/htsjdk/samtools/seekablestream/UserPasswordInput.java b/src/main/java/htsjdk/samtools/seekablestream/UserPasswordInput.java index eda1ef2b54..8a1b4217e7 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/UserPasswordInput.java +++ b/src/main/java/htsjdk/samtools/seekablestream/UserPasswordInput.java @@ -1,31 +1,34 @@ -/* - * The MIT License - * - * Copyright (c) 2013 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.seekablestream; - -public interface UserPasswordInput { - public void setHost(String host); - public boolean showDialog(); - public String getUser(); - public String getPassword(); -} +/* + * The MIT License + * + * Copyright (c) 2013 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.seekablestream; + +public interface UserPasswordInput { + public void setHost(String host); + + public boolean showDialog(); + + public String getUser(); + + public String getPassword(); +} diff --git a/src/main/java/htsjdk/samtools/sra/ReferenceCache.java b/src/main/java/htsjdk/samtools/sra/ReferenceCache.java deleted file mode 100644 index 9eb389a611..0000000000 --- a/src/main/java/htsjdk/samtools/sra/ReferenceCache.java +++ /dev/null @@ -1,46 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMFileHeader; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; - - -/** - * That is a thread-safe wrapper for a list of cache Reference objects. - * Those objects can be used from different threads without issues, however to load and save a Reference object, we - * need to acquire a lock. - * - * Created by andrii.nikitiuk on 10/28/15. - */ -public class ReferenceCache { - private ReadCollection run; - private SAMFileHeader virtualHeader; - private Reference cachedReference; - - public ReferenceCache(ReadCollection run, SAMFileHeader virtualHeader) { - this.run = run; - this.virtualHeader = virtualHeader; - } - - /** - * This method returns Reference objects by reference indexes in SAM header - * Those objects do not maintain thread safety - * - * @param referenceIndex reference index in - * @return a Reference object - */ - public Reference get(int referenceIndex) { - String contig = virtualHeader.getSequence(referenceIndex).getSequenceName(); - - try { - if (cachedReference == null || !cachedReference.getCanonicalName().equals(contig)) { - cachedReference = run.getReference(contig); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return cachedReference; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAAccession.java b/src/main/java/htsjdk/samtools/sra/SRAAccession.java deleted file mode 100644 index 9aeb10fe8a..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAAccession.java +++ /dev/null @@ -1,177 +0,0 @@ -/*=========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -*/ - -package htsjdk.samtools.sra; - -import gov.nih.nlm.ncbi.ngs.error.LibraryLoadError; -import htsjdk.samtools.Defaults; -import htsjdk.samtools.util.Log; -import gov.nih.nlm.ncbi.ngs.NGS; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Serializable; -import java.util.Arrays; - -/** - * Describes a single SRA accession for SRA read collection - * Also provides app string functionality and allows to check if working SRA is supported on the running platform - * - * Important: due to checks performed in SRAAccession.isValid(), we won't recognise any accessions other - * than ones that follow the pattern "^[SED]RR[0-9]{6,9}$", e.g. SRR000123 - */ -public class SRAAccession implements Serializable { - private static final Log log = Log.getInstance(SRAAccession.class); - - private static boolean noLibraryDownload; - private static boolean initTried = false; - private static String appVersionString = null; - private final static String defaultAppVersionString = "[unknown software]"; - private final static String htsJdkVersionString = "HTSJDK-NGS"; - - static final String REMOTE_ACCESSION_PATTERN = "^[SED]RR[0-9]{6,9}$"; - - private String acc; - - static { - noLibraryDownload = !Defaults.SRA_LIBRARIES_DOWNLOAD; - if (noLibraryDownload) { - System.setProperty("vdb.System.noLibraryDownload", "1"); - } - } - - /** - * Sets an app version string which will let SRA know which software uses it. - * @param appVersionString a string that describes running application - */ - public static void setAppVersionString(String appVersionString) { - SRAAccession.appVersionString = appVersionString; - } - - /** - * @return true if SRA successfully loaded native libraries and fully initialized, - * false otherwise - * @deprecated use {@link #checkIfInitialized} instead - */ - @Deprecated - public static boolean isSupported() { - return checkIfInitialized() == null; - } - - /** - * Tries to initialize SRA. Initialization error is saved during first call, - * all subsequent calls will return the same saved error or null. - * - * @return ExceptionInInitializerError if initialization failed, null if initialization was successful - */ - public static ExceptionInInitializerError checkIfInitialized() { - final ExceptionInInitializerError ngsInitError; - if (!initTried) { - log.debug("Initializing SRA module"); - ngsInitError = NGS.getInitializationError(); - if (ngsInitError != null) { - log.info("SRA initialization failed. Will not be able to read from SRA"); - } else { - NGS.setAppVersionString(getFullVersionString()); - } - initTried = true; - } else { - ngsInitError = NGS.getInitializationError(); - } - return ngsInitError; - } - - /** - * @param acc accession - * @return true if a string is a valid SRA accession - */ - public static boolean isValid(String acc) { - boolean looksLikeSRA = false; - File f = new File(acc); - if (f.isFile()) { - byte[] buffer = new byte[8]; - byte[] signature1 = "NCBI.sra".getBytes(); - byte[] signature2 = "NCBInenc".getBytes(); - - try (InputStream is = new FileInputStream(f)) { - int numRead = is.read(buffer); - - looksLikeSRA = numRead == buffer.length && - (Arrays.equals(buffer, signature1) || Arrays.equals(buffer, signature2)); - } catch (IOException e) { - looksLikeSRA = false; - } - } else if (f.exists()) { - // anything else local other than a file is not an SRA archive - looksLikeSRA = false; - } else { - looksLikeSRA = acc.toUpperCase().matches ( REMOTE_ACCESSION_PATTERN ); - } - - if (!looksLikeSRA) return false; - - final ExceptionInInitializerError initError = checkIfInitialized(); - if (initError != null) { - if (noLibraryDownload && initError instanceof LibraryLoadError) { - throw new LinkageError( - "Failed to load SRA native libraries and auto-download is disabled. " + - "Please re-run with JVM argument -Dsamjdk.sra_libraries_download=true to enable auto-download of native libraries", - initError - ); - } else { - throw initError; - } - } - - return NGS.isValid(acc); - } - - /** - * @param acc accession - */ - public SRAAccession(String acc) { - this.acc = acc; - } - - public String toString() { - return acc; - } - - /** - * @return true if contained string is an SRA accession - */ - public boolean isValid() { - return SRAAccession.isValid(acc); - } - - private static String getFullVersionString() { - String versionString = appVersionString == null ? defaultAppVersionString : appVersionString; - versionString += " through " + htsJdkVersionString; - return versionString; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java b/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java deleted file mode 100644 index 650ddce747..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAAlignmentIterator.java +++ /dev/null @@ -1,209 +0,0 @@ -/*=========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -*/ - -package htsjdk.samtools.sra; - - -import htsjdk.samtools.Chunk; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SRAIterator; -import htsjdk.samtools.ValidationStringency; -import htsjdk.samtools.util.CloseableIterator; -import ngs.Alignment; -import ngs.AlignmentIterator; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; - - -/** - * Iterator for aligned reads. - * Is used from SRAIterator. - * Created by andrii.nikitiuk on 9/3/15. - */ -public class SRAAlignmentIterator implements CloseableIterator { - private ValidationStringency validationStringency; - - private SRAAccession accession; - private ReadCollection run; - private SAMFileHeader header; - private ReferenceCache cachedReferences; - private List referencesLengths; - private Iterator referencesChunksIterator; - private int currentReference = -1; - - private boolean hasMoreReferences = true; - - private AlignmentIterator alignedIterator; - private Boolean hasMoreAlignments = false; - - private SRALazyRecord lastRecord; - - /** - * @param run opened read collection - * @param header sam header - * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader - * @param recordRangeInfo info about record ranges withing SRA archive - * @param chunk used to determine which alignments the iterator should return - */ - public SRAAlignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences, - final SRAIterator.RecordRangeInfo recordRangeInfo, final Chunk chunk) { - this.accession = accession; - this.run = run; - this.header = header; - this.cachedReferences = cachedReferences; - this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned(); - - referencesChunksIterator = getReferenceChunks(chunk).iterator(); - - try { - nextReference(); - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public boolean hasNext() { - // check aligned - if (lastRecord != null) { - lastRecord.detachFromIterator(); - lastRecord = null; - } - - if (hasMoreAlignments == null) { - try { - hasMoreAlignments = alignedIterator.nextAlignment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - while (!hasMoreAlignments && hasMoreReferences) { - nextReference(); - } - - return hasMoreAlignments; - } - - @Override - public SAMRecord next() { - if (!hasNext()) { - throw new NoSuchElementException("No more alignments are available"); - } - - return nextAlignment(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Removal of records not implemented."); - } - - public void setValidationStringency(ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - } - - private SAMRecord nextAlignment() { - try { - lastRecord = new SRALazyRecord(header, accession, run, alignedIterator, alignedIterator.getReadId(), alignedIterator.getAlignmentId()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - if (validationStringency != null) { - lastRecord.setValidationStringency(validationStringency); - } - - hasMoreAlignments = null; - - return lastRecord; - } - - private void nextReference() { - if (!hasMoreReferences) { - throw new NoSuchElementException("Cannot get next reference - already at last one"); - } - - try { - alignedIterator = null; - hasMoreAlignments = false; - - hasMoreReferences = referencesChunksIterator.hasNext(); - if (!hasMoreReferences) { - return; - } - - currentReference++; - Chunk refChunk = referencesChunksIterator.next(); - if (refChunk == null) { - return; - } - - Reference reference = cachedReferences.get(currentReference); - - alignedIterator = reference.getFilteredAlignmentSlice( - refChunk.getChunkStart(), refChunk.getChunkEnd() - refChunk.getChunkStart(), - Alignment.all, Alignment.startWithinSlice | Alignment.passDuplicates | Alignment.passFailed, 0); - - hasMoreAlignments = alignedIterator.nextAlignment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private List getReferenceChunks(final Chunk chunk) { - List referencesChunks = new ArrayList(); - long refOffset = 0; - for (Long refLen : referencesLengths) { - if (chunk.getChunkStart() - refOffset >= refLen || chunk.getChunkEnd() - refOffset <= 0) { - referencesChunks.add(null); - } else { - long refChunkStart = Math.max(chunk.getChunkStart() - refOffset, 0); - long refChunkEnd = Math.min(chunk.getChunkEnd() - refOffset, refLen); - referencesChunks.add(new Chunk(refChunkStart, refChunkEnd)); - } - - refOffset += refLen; - } - - return referencesChunks; - } - - @Override - public void close() { - if (lastRecord != null) { - lastRecord.detachFromIterator(); - lastRecord = null; - } - - alignedIterator = null; - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java b/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java deleted file mode 100644 index 1d9a471b27..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java +++ /dev/null @@ -1,112 +0,0 @@ -package htsjdk.samtools.sra; - -import htsjdk.samtools.SAMSequenceDictionary; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.reference.ReferenceSequence; -import htsjdk.samtools.reference.ReferenceSequenceFile; -import ngs.ErrorMsg; -import ngs.ReadCollection; -import ngs.Reference; -import ngs.ReferenceIterator; - -import java.io.IOException; -import java.util.Iterator; - -/** - * Allows reading Reference data from SRA - */ -public class SRAIndexedSequenceFile implements ReferenceSequenceFile { - private SRAAccession acc; - private ReadCollection run; - private Reference cachedReference; - - private Iterator sequenceRecordIterator; - - protected SAMSequenceDictionary sequenceDictionary; - - /** - * @param acc accession - */ - public SRAIndexedSequenceFile(SRAAccession acc) { - this.acc = acc; - - if (!acc.isValid()) { - throw new RuntimeException("Passed an invalid SRA accession into SRA reader: " + acc); - } - - try { - run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString()); - sequenceDictionary = loadSequenceDictionary(); - } catch (final ErrorMsg e) { - throw new RuntimeException(e); - } - - reset(); - } - - @Override - public SAMSequenceDictionary getSequenceDictionary() { - return sequenceDictionary; - } - - @Override - public ReferenceSequence nextSequence() { - SAMSequenceRecord sequence = sequenceRecordIterator.next(); - return getSubsequenceAt(sequence.getSequenceName(), 1L, sequence.getSequenceLength()); - } - - @Override - public void reset() { - sequenceRecordIterator = sequenceDictionary.getSequences().iterator(); - } - - @Override - public boolean isIndexed() { - return true; - } - - @Override - public ReferenceSequence getSequence(String contig) { - return getSubsequenceAt(contig, 1L, sequenceDictionary.getSequence(contig).getSequenceLength()); - } - - @Override - public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) { - SAMSequenceRecord sequence = sequenceDictionary.getSequence(contig); - int referenceIndex = sequence.getSequenceIndex(); - - byte[] bases; - - try { - Reference reference; - synchronized (this) { - if (cachedReference == null || !cachedReference.getCanonicalName().equals(contig)) { - cachedReference = run.getReference(contig); - } - reference = cachedReference; - - bases = reference.getReferenceBases(start - 1, stop - (start - 1)).getBytes(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return new ReferenceSequence(contig, referenceIndex, bases); - } - - @Override - public void close() throws IOException { - cachedReference = null; - } - - protected SAMSequenceDictionary loadSequenceDictionary() throws ErrorMsg { - SAMSequenceDictionary dict = new SAMSequenceDictionary(); - - ReferenceIterator itRef = run.getReferences(); - while (itRef.nextReference()) { - dict.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength())); - } - - return dict; - } -} \ No newline at end of file diff --git a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java b/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java deleted file mode 100644 index 418ee3af2f..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRALazyRecord.java +++ /dev/null @@ -1,1047 +0,0 @@ -/*=========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -*/ - -package htsjdk.samtools.sra; - -import gov.nih.nlm.ncbi.ngs.NGS; -import htsjdk.samtools.*; -import htsjdk.samtools.util.Log; -import ngs.ReadCollection; -import ngs.AlignmentIterator; -import ngs.Alignment; -import ngs.ReadIterator; -import ngs.Read; -import ngs.Fragment; -import ngs.ErrorMsg; - -import java.util.EnumSet; -import java.util.Set; -import java.util.Map; -import java.util.HashMap; -import java.util.List; - -/** - * Extends SAMRecord so that any of the fields will be loaded only when needed. - * Since SRA is a column oriented database, it is very inefficient to load all the fields at once. - * However, loading only set of actually needed fields will be even faster than in row oriented databases. - * - * Because of that we are providing lazy loading of fields, flags and attributes. - * - * Created by andrii.nikitiuk on 8/25/15. - */ -public class SRALazyRecord extends SAMRecord { - private static final Log log = Log.getInstance(SRALazyRecord.class); - - private SRAAccession accession; - private boolean isAligned; - private transient ReadCollection run; - private transient Alignment alignmentIterator; - private transient Read unalignmentIterator; - private String sraReadId; - private String sraAlignmentId; - private int unalignedReadFragmentIndex = -1; - - - private Set initializedFields = EnumSet.noneOf(LazyField.class); - private Set initializedFlags = EnumSet.noneOf(LazyFlag.class); - private Set initializedAttributes = EnumSet.noneOf(LazyAttribute.class); - - private enum LazyField { - ALIGNMENT_START { - @Override - public void loadValue(SRALazyRecord self) { - self.getAlignmentStart(); - } - }, - MAPPING_QUALITY { - @Override - public void loadValue(SRALazyRecord self) { - self.getMappingQuality(); - } - }, - REFERENCE_NAME { - @Override - public void loadValue(SRALazyRecord self) { - self.getReferenceName(); - } - }, - CIGAR_STRING { - @Override - public void loadValue(SRALazyRecord self) { - self.getCigarString(); - } - }, - BASES { - @Override - public void loadValue(SRALazyRecord self) { - self.getReadBases(); - } - }, - QUALS { - @Override - public void loadValue(SRALazyRecord self) { - self.getBaseQualities(); - } - }, - MATE_ALIGNMENT_START { - @Override - public void loadValue(SRALazyRecord self) { - self.getMateAlignmentStart(); - } - }, - MATE_REFERENCE_NAME { - @Override - public void loadValue(SRALazyRecord self) { - self.getMateReferenceName(); - } - }, - INFERRED_INSERT_SIZE { - @Override - public void loadValue(SRALazyRecord self) { - self.getInferredInsertSize(); - } - }; - - public abstract void loadValue(SRALazyRecord self); - } - - private enum LazyFlag { - READ_NEGATIVE_STRAND(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getReadNegativeStrandFlag(); - } - }, - READ_PAIRED(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getReadPairedFlag(); - } - }, - PROPER_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getProperPairFlag(); - } - }, - SECONDARY_ALIGNMENT(true) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.isSecondaryAlignment(); - } - }, - MATE_NEGATIVE_STRAND(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getMateNegativeStrandFlag(); - } - }, - MATE_UNMAPPED(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getMateUnmappedFlag(); - } - }, - FIRST_OF_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getFirstOfPairFlag(); - } - }, - SECOND_OF_PAIR(false) { - @Override - public boolean getFlag(SRALazyRecord self) { - return self.getSecondOfPairFlag(); - } - }; - - private final boolean canCallOnNotPaired; - - LazyFlag(final boolean canCallOnNotPaired) { - this.canCallOnNotPaired = canCallOnNotPaired; - } - - public boolean canCallOnNotPaired() { return canCallOnNotPaired; } - - public abstract boolean getFlag(SRALazyRecord self); - } - - private enum LazyAttribute { - RG { - @Override - public String getAttribute(SRALazyRecord self) { - return self.getAttributeGroupNameImpl(); - } - }; - - public abstract String getAttribute(SRALazyRecord self); - } - - private static Map lazyAttributeTags; - static - { - lazyAttributeTags = new HashMap(); - lazyAttributeTags.put(SAMTag.RG.getBinaryTag(), LazyAttribute.RG); - } - - public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, AlignmentIterator alignmentIterator, String readId, String alignmentId) { - this(header, accession, readId, alignmentId); - - this.run = run; - this.alignmentIterator = alignmentIterator; - } - - public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, ReadIterator unalignmentIterator, String readId, int unalignedReadFragmentIndex) { - this(header, accession, readId, unalignedReadFragmentIndex); - - this.run = run; - this.unalignmentIterator = unalignmentIterator; - } - - protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, String alignmentId) { - this(header, accession, readId, true); - - this.sraAlignmentId = alignmentId; - } - - protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, int unalignedReadFragmentIndex) { - this(header, accession, readId, false); - - this.unalignedReadFragmentIndex = unalignedReadFragmentIndex; - } - - private SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, boolean isAligned) { - super(header); - - this.accession = accession; - this.isAligned = isAligned; - this.sraReadId = readId; - setReadName(readId); - setReadUnmappedFlag(!isAligned); - } - - /** - * Is being called when original NGS iterator is being moved to the next object. - * Later, if any of uninitialized fields is requested, either Read object or Alignment has to be retrieved from - * ReadCollection - */ - public void detachFromIterator() { - alignmentIterator = null; - unalignmentIterator = null; - } - - // ===== fields ===== - - @Override - public int getAlignmentStart() { - if (!initializedFields.contains(LazyField.ALIGNMENT_START)) { - setAlignmentStart(getAlignmentStartImpl()); - } - return super.getAlignmentStart(); - } - - @Override - public void setAlignmentStart(final int value) { - if (!initializedFields.contains(LazyField.ALIGNMENT_START)) { - initializedFields.add(LazyField.ALIGNMENT_START); - } - super.setAlignmentStart(value); - } - - @Override - public int getMappingQuality() { - if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) { - setMappingQuality(getMappingQualityImpl()); - } - return super.getMappingQuality(); - } - - @Override - public void setMappingQuality(final int value) { - if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) { - initializedFields.add(LazyField.MAPPING_QUALITY); - } - super.setMappingQuality(value); - } - - @Override - public String getReferenceName() { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - setReferenceName(getReferenceNameImpl()); - } - return super.getReferenceName(); - } - - @Override - public void setReferenceName(final String value) { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - initializedFields.add(LazyField.REFERENCE_NAME); - } - super.setReferenceName(value); - } - - @Override - public Integer getReferenceIndex() { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - setReferenceName(getReferenceNameImpl()); - } - return super.getReferenceIndex(); - } - - @Override - public void setReferenceIndex(final int value) { - if (!initializedFields.contains(LazyField.REFERENCE_NAME)) { - initializedFields.add(LazyField.REFERENCE_NAME); - } - super.setReferenceIndex(value); - } - - @Override - public String getCigarString() { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - setCigarString(getCigarStringImpl()); - } - return super.getCigarString(); - } - - @Override - public void setCigarString(final String value) { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - initializedFields.add(LazyField.CIGAR_STRING); - } - super.setCigarString(value); - } - - @Override - public Cigar getCigar() { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - setCigarString(getCigarStringImpl()); - } - return super.getCigar(); - } - - @Override - public void setCigar(final Cigar value) { - if (!initializedFields.contains(LazyField.CIGAR_STRING)) { - initializedFields.add(LazyField.CIGAR_STRING); - } - super.setCigar(value); - } - - @Override - public byte[] getReadBases() { - if (!initializedFields.contains(LazyField.BASES)) { - setReadBases(getReadBasesImpl()); - } - return super.getReadBases(); - } - - @Override - public void setReadBases(final byte[] value) { - if (!initializedFields.contains(LazyField.BASES)) { - initializedFields.add(LazyField.BASES); - } - super.setReadBases(value); - } - - @Override - public byte[] getBaseQualities() { - if (!initializedFields.contains(LazyField.QUALS)) { - setBaseQualities(getBaseQualitiesImpl()); - } - return super.getBaseQualities(); - } - - @Override - public void setBaseQualities(final byte[] value) { - if (!initializedFields.contains(LazyField.QUALS)) { - initializedFields.add(LazyField.QUALS); - } - super.setBaseQualities(value); - } - - @Override - public int getMateAlignmentStart() { - if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { - setMateAlignmentStart(getMateAlignmentStartImpl()); - } - return super.getMateAlignmentStart(); - } - - @Override - public void setMateAlignmentStart(final int value) { - if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) { - initializedFields.add(LazyField.MATE_ALIGNMENT_START); - } - super.setMateAlignmentStart(value); - } - - @Override - public String getMateReferenceName() { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - setMateReferenceName(getMateReferenceNameImpl()); - } - return super.getMateReferenceName(); - } - - @Override - public void setMateReferenceName(final String value) { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - initializedFields.add(LazyField.MATE_REFERENCE_NAME); - } - super.setMateReferenceName(value); - } - - @Override - public Integer getMateReferenceIndex() { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - setMateReferenceName(getMateReferenceNameImpl()); - } - return super.getMateReferenceIndex(); - } - - @Override - public void setMateReferenceIndex(final int value) { - if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) { - initializedFields.add(LazyField.MATE_REFERENCE_NAME); - } - super.setMateReferenceIndex(value); - } - - @Override - public int getInferredInsertSize() { - if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) { - setInferredInsertSize(getInferredInsertSizeImpl()); - } - return super.getInferredInsertSize(); - } - - @Override - public void setInferredInsertSize(final int value) { - if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) { - initializedFields.add(LazyField.INFERRED_INSERT_SIZE); - } - super.setInferredInsertSize(value); - } - - // ===== flags ===== - - @Override - public int getFlags() { - for (LazyFlag flag : LazyFlag.values()) { - if (initializedFlags.contains(flag)) { - continue; - } - - if (flag.canCallOnNotPaired() || getReadPairedFlag()) { - flag.getFlag(this); - } - } - - return super.getFlags(); - } - - @Override - public void setFlags(final int value) { - for (LazyFlag flag : LazyFlag.values()) { - if (!initializedFlags.contains(flag)) { - initializedFlags.add(flag); - } - } - super.setFlags(value); - } - - @Override - public boolean getReadNegativeStrandFlag() { - if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) { - setReadNegativeStrandFlag(getReadNegativeStrandFlagImpl()); - } - return super.getReadNegativeStrandFlag(); - } - - @Override - public void setReadNegativeStrandFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) { - initializedFlags.add(LazyFlag.READ_NEGATIVE_STRAND); - } - super.setReadNegativeStrandFlag(flag); - } - - @Override - public boolean getReadPairedFlag() { - if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) { - setReadPairedFlag(getReadPairedFlagImpl()); - } - return super.getReadPairedFlag(); - } - - @Override - public void setReadPairedFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) { - initializedFlags.add(LazyFlag.READ_PAIRED); - } - super.setReadPairedFlag(flag); - } - - @Override - public boolean getProperPairFlag() { - if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) { - setProperPairFlag(getProperPairFlagImpl()); - } - return super.getProperPairFlag(); - } - - @Override - public void setProperPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) { - initializedFlags.add(LazyFlag.PROPER_PAIR); - } - super.setProperPairFlag(flag); - } - - @Override - public boolean isSecondaryAlignment() { - if (!initializedFlags.contains(LazyFlag.SECONDARY_ALIGNMENT)) { - setSecondaryAlignment(getSecondaryAlignmentFlagImpl()); - } - return super.isSecondaryAlignment(); - } - - @Override - public void setSecondaryAlignment(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.SECONDARY_ALIGNMENT)) { - initializedFlags.add(LazyFlag.SECONDARY_ALIGNMENT); - } - super.setSecondaryAlignment(flag); - } - - @Override - public boolean getMateNegativeStrandFlag() { - if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) { - setMateNegativeStrandFlag(getMateNegativeStrandFlagImpl()); - } - return super.getMateNegativeStrandFlag(); - } - - @Override - public void setMateNegativeStrandFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) { - initializedFlags.add(LazyFlag.MATE_NEGATIVE_STRAND); - } - super.setMateNegativeStrandFlag(flag); - } - - @Override - public boolean getMateUnmappedFlag() { - if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) { - setMateUnmappedFlag(getMateUnmappedFlagImpl()); - } - return super.getMateUnmappedFlag(); - } - - @Override - public void setMateUnmappedFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) { - initializedFlags.add(LazyFlag.MATE_UNMAPPED); - } - super.setMateUnmappedFlag(flag); - } - - @Override - public boolean getFirstOfPairFlag() { - if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) { - setFirstOfPairFlag(getFirstOfPairFlagImpl()); - } - return super.getFirstOfPairFlag(); - } - - @Override - public void setFirstOfPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) { - initializedFlags.add(LazyFlag.FIRST_OF_PAIR); - } - super.setFirstOfPairFlag(flag); - } - - @Override - public boolean getSecondOfPairFlag() { - if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) { - setSecondOfPairFlag(getSecondOfPairFlagImpl()); - } - return super.getSecondOfPairFlag(); - } - - @Override - public void setSecondOfPairFlag(final boolean flag) { - if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) { - initializedFlags.add(LazyFlag.SECOND_OF_PAIR); - } - super.setSecondOfPairFlag(flag); - } - - - // ===== attributes ===== - - @Override - public Object getAttribute(final short tag) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null) { - if (!initializedAttributes.contains(attr)) { - setAttribute(tag, attr.getAttribute(this)); - } - } - return super.getAttribute(tag); - } - - @Override - public void setAttribute(final short tag, final Object value) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null && !initializedAttributes.contains(attr)) { - initializedAttributes.add(attr); - } - super.setAttribute(tag, value); - } - - @Override - protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) { - LazyAttribute attr = lazyAttributeTags.get(tag); - if (attr != null && !initializedAttributes.contains(attr)) { - initializedAttributes.add(attr); - } - super.setAttribute(tag, value, isUnsignedArray); - } - - @Override - public void clearAttributes() { - for (LazyAttribute lazyAttribute : LazyAttribute.values()) { - if (!initializedAttributes.contains(lazyAttribute)) { - initializedAttributes.add(lazyAttribute); - } - } - super.clearAttributes(); - } - - @Override - protected void setAttributes(final SAMBinaryTagAndValue attributes) { - for (LazyAttribute lazyAttribute : LazyAttribute.values()) { - if (!initializedAttributes.contains(lazyAttribute)) { - initializedAttributes.add(lazyAttribute); - } - } - super.setAttributes(attributes); - } - - @Override - protected SAMBinaryTagAndValue getBinaryAttributes() { - for (Map.Entry info : lazyAttributeTags.entrySet()) { - if (!initializedAttributes.contains(info.getValue())) { - getAttribute(info.getKey()); - } - } - - return super.getBinaryAttributes(); - } - - @Override - public boolean isUnsignedArrayAttribute(final String tag) { - Short binaryTag = SAMTag.makeBinaryTag(tag); - LazyAttribute attr = lazyAttributeTags.get(binaryTag); - if (attr != null && !initializedAttributes.contains(attr)) { - getAttribute(binaryTag); - } - - return super.isUnsignedArrayAttribute(tag); - } - - // ===== misc ==== - - /** - * For records equality, we should only compare read id, reference and position on the reference. - * Since read id is a constructor parameter, we only need to make sure that reference info is loaded. - * @param o other - * @return comparison result - */ - @Override - public boolean equals(final Object o) { - if (o instanceof SRALazyRecord) { - SRALazyRecord otherRecord = (SRALazyRecord)o; - otherRecord.getReferenceIndex(); - otherRecord.getAlignmentStart(); - } - - getReferenceIndex(); - getAlignmentStart(); - - return super.equals(o); - } - - /** - * The same approach as with 'equals' method. We only load reference and position. - */ - @Override - public int hashCode() { - getReferenceIndex(); - getAlignmentStart(); - - return super.hashCode(); - } - - /** - * Performs a deep copy of the SAMRecord and detaches a copy from NGS iterator - * @return new object - * @throws CloneNotSupportedException - */ - @Override - public Object clone() throws CloneNotSupportedException { - SRALazyRecord newObject = (SRALazyRecord)super.clone(); - newObject.initializedFields = EnumSet.copyOf(this.initializedFields); - newObject.initializedFlags = EnumSet.copyOf(this.initializedFlags); - newObject.initializedAttributes = EnumSet.copyOf(this.initializedAttributes); - newObject.detachFromIterator(); - - return newObject; - } - - @Override - public String format() { - if (!initializedAttributes.contains(LazyAttribute.RG)) { - getAttribute("RG"); - } - return super.format(); - } - - @Override - public List isValid(final boolean firstOnly) { - loadFields(); - getFlags(); - getBinaryAttributes(); - - return super.isValid(firstOnly); - } - - // =============================== Implementation ======================================== - - private ReadCollection getReadCollection() { - if (run != null) { - return run; - } - - log.debug("Recovering SRA read collection. Accession: " + accession); - try { - return run = NGS.openReadCollection(accession.toString()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private Alignment getCurrentAlignment() throws ErrorMsg { - if (!isAligned) { - throw new RuntimeException("Should be called for aligned records only"); - } - - if (alignmentIterator == null) { - log.debug("Recovering SAM record after detaching from iterator. Alignment id: " + sraAlignmentId); - if (sraAlignmentId == null) { - throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no alignment id"); - } - - alignmentIterator = getReadCollection().getAlignment(sraAlignmentId); - } - return alignmentIterator; - } - - private Read getCurrentUnalignedRead() throws ErrorMsg { - if (isAligned) { - throw new RuntimeException("Should be called for unaligned records only"); - } - - if (unalignmentIterator == null) { - log.debug("Recovering SAM record after detaching from iterator. Read id: " + sraReadId + ", fragment index: " + unalignedReadFragmentIndex); - if (sraReadId == null) { - throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no read id"); - } - - Read read = getReadCollection().getRead(sraReadId); - for (int i = 0; i < unalignedReadFragmentIndex + 1; i++) { - read.nextFragment(); - } - - unalignmentIterator = read; - } - return unalignmentIterator; - } - - // ===== fields ===== - - private void loadFields() { - for (LazyField field : LazyField.values()) { - if (initializedFields.contains(field)) { - continue; - } - - field.loadValue(this); - } - } - - private int getAlignmentStartImpl() { - try { - if (isAligned) { - return (int) getCurrentAlignment().getAlignmentPosition() + 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_START; - } - - private int getMappingQualityImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getMappingQuality(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_MAPPING_QUALITY; - } - - private String getReferenceNameImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getReferenceSpec(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME; - } - - private String getCigarStringImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getShortCigar(false); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_CIGAR; - } - - private byte[] getReadBasesImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getAlignedFragmentBases().getBytes(); - } else { - return getCurrentUnalignedRead().getFragmentBases().getBytes(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private byte[] getBaseQualitiesImpl() { - try { - Fragment fragment; - if (isAligned) { - fragment = getCurrentAlignment(); - } else { - fragment = getCurrentUnalignedRead(); - } - - // quals are being taken from PRIMARY_ALIGNMENT.SAM_QUALITY column which reverse automatically them if needed - return SAMUtils.fastqToPhred(fragment.getFragmentQualities()); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private int getMateAlignmentStartImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - Alignment mate = getCurrentAlignment().getMateAlignment(); - return (int) mate.getAlignmentPosition() + 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_START; - } - - private String getMateReferenceNameImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - return getCurrentAlignment().getMateReferenceSpec(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME; - } - - private int getInferredInsertSizeImpl() { - try { - if (isAligned) { - return (int) getCurrentAlignment().getTemplateLength(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return 0; - } - - // ===== flags ===== - - private boolean getReadNegativeStrandFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getIsReversedOrientation(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - return false; - } - - private boolean getReadPairedFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().isPaired(); - } else { - return getCurrentUnalignedRead().getNumFragments() > 1; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getProperPairFlagImpl() { - return isAligned && getReadPairedFlag() && !getMateUnmappedFlag(); - } - - private boolean getSecondaryAlignmentFlagImpl() { - try { - if (isAligned) { - return getCurrentAlignment().getAlignmentCategory() == Alignment.secondaryAlignment; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return false; - } - - private boolean getMateNegativeStrandFlagImpl() { - try { - if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) { - Alignment mate = getCurrentAlignment().getMateAlignment(); - return mate.getIsReversedOrientation(); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - return false; - } - - private boolean getMateUnmappedFlagImpl() { - try { - if (isAligned) { - return !getCurrentAlignment().hasMate(); - } else { - Read unalignedRead = getCurrentUnalignedRead(); - int numFragments = unalignedRead.getNumFragments(); - int nextFragmentIdx = unalignedReadFragmentIndex + 1; - if (nextFragmentIdx == numFragments) { - nextFragmentIdx = 0; - } - - return unalignedRead.fragmentIsAligned(nextFragmentIdx); - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getFirstOfPairFlagImpl() { - if (!getReadPairedFlag()) { - return false; - } - try { - if (isAligned) { - String fragmentId = getCurrentAlignment().getFragmentId(); - if (!fragmentId.contains(".FA")) { - throw new RuntimeException("Invalid fragment id: " + fragmentId); - } - - return fragmentId.contains(".FA0."); - } else { - return unalignedReadFragmentIndex == 0; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - private boolean getSecondOfPairFlagImpl() { - if (!getReadPairedFlag()) { - return false; - } - try { - if (isAligned) { - String fragmentId = getCurrentAlignment().getFragmentId(); - if (!fragmentId.contains(".FA")) { - throw new RuntimeException("Invalid fragment id: " + fragmentId); - } - - return !fragmentId.contains(".FA0."); - } else { - return unalignedReadFragmentIndex != 0; - } - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - - // ===== attributes ===== - - private String getAttributeGroupNameImpl() { - try { - String readGroupName; - if (isAligned) { - readGroupName = getCurrentAlignment().getReadGroup(); - } else { - readGroupName = getCurrentUnalignedRead().getReadGroup(); - } - - if (!readGroupName.isEmpty()) { - return readGroupName; - } - return getReadCollection().getName(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java b/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java deleted file mode 100644 index f128a2bf56..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java +++ /dev/null @@ -1,181 +0,0 @@ -/*=========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -*/ - -package htsjdk.samtools.sra; - - -import htsjdk.samtools.Chunk; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SRAIterator; -import htsjdk.samtools.ValidationStringency; -import ngs.ErrorMsg; -import ngs.Read; -import ngs.ReadCollection; -import ngs.ReadIterator; - -import java.util.Iterator; -import java.util.NoSuchElementException; - -/** - * Iterator for unaligned reads. - * Is used from SRAIterator. - * - * Created by andrii.nikitiuk on 9/3/15. - */ -public class SRAUnalignmentIterator implements Iterator { - private ValidationStringency validationStringency; - - private SRAAccession accession; - private ReadCollection run; - private SAMFileHeader header; - private SRAIterator.RecordRangeInfo recordRangeInfo; - - private ReadIterator unalignedIterator; - private boolean hasMoreUnalignedReads = true; - private Boolean hasMoreUnalignedFragments = false; - private int lastUnalignedFragmentIndex; - - private SRALazyRecord lastRecord; - - /** - * - * @param run opened read collection - * @param header sam header - * @param recordRangeInfo info about record ranges withing SRA archive - * @param chunk used to determine which unaligned reads the iterator should return - */ - public SRAUnalignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo, Chunk chunk) { - this.accession = accession; - this.run = run; - this.header = header; - this.recordRangeInfo = recordRangeInfo; - - long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength(); - if (readStart < 0) { - readStart = 0; - } else if (readStart >= recordRangeInfo.getNumberOfReads()) { - throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read"); - } - - long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength(); - if (readEnd > recordRangeInfo.getNumberOfReads()) { - readEnd = recordRangeInfo.getNumberOfReads(); - } else if (readEnd <= 0) { - throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read"); - } - - try { - unalignedIterator = run.getReadRange(readStart + 1, readEnd - readStart, Read.partiallyAligned | Read.unaligned); - nextUnalignedFragment(); - - } catch (final Exception e) { - throw new RuntimeException(e); - } - } - - @Override - public boolean hasNext() { - // check unaligned - if (hasMoreUnalignedFragments == null) { - try { - lastRecord.detachFromIterator(); - nextUnalignedFragment(); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - } - return hasMoreUnalignedFragments; - } - - @Override - public SAMRecord next() { - if (!hasNext()) { - throw new NoSuchElementException("No more alignments are available"); - } - - return nextUnalignment(); - } - - @Override - public void remove() { - throw new UnsupportedOperationException("Removal of records not implemented."); - } - - public void setValidationStringency(ValidationStringency validationStringency) { - this.validationStringency = validationStringency; - } - - private SAMRecord nextUnalignment() { - try { - lastRecord = new SRALazyRecord(header, accession, run, unalignedIterator, unalignedIterator.getReadId(), lastUnalignedFragmentIndex); - } catch (ErrorMsg e) { - throw new RuntimeException(e); - } - - if (validationStringency != null) { - lastRecord.setValidationStringency(validationStringency); - } - - hasMoreUnalignedFragments = null; - - return lastRecord; - } - - private void nextUnalignedFragment() throws ErrorMsg { - while (hasMoreUnalignedFragments == null || hasMoreUnalignedFragments) { - hasMoreUnalignedFragments = unalignedIterator.nextFragment(); - lastUnalignedFragmentIndex++; - - if (hasMoreUnalignedFragments && !unalignedIterator.isAligned()) { - return; - } - } - - if (!hasMoreUnalignedReads) { - throw new RuntimeException("Cannot get next unaligned read - already at last one"); - } - - while (true) { - hasMoreUnalignedReads = unalignedIterator.nextRead(); - lastUnalignedFragmentIndex = -1; - if (!hasMoreUnalignedReads) { - break; - } - - // search for unaligned fragment - do { - hasMoreUnalignedFragments = unalignedIterator.nextFragment(); - lastUnalignedFragmentIndex++; - } while (hasMoreUnalignedFragments && unalignedIterator.isAligned()); - - // means that we found fragment - if (hasMoreUnalignedFragments) { - return; - } - } - } -} diff --git a/src/main/java/htsjdk/samtools/sra/SRAUtils.java b/src/main/java/htsjdk/samtools/sra/SRAUtils.java deleted file mode 100644 index e72caa8687..0000000000 --- a/src/main/java/htsjdk/samtools/sra/SRAUtils.java +++ /dev/null @@ -1,83 +0,0 @@ -/*=========================================================================== -* -* PUBLIC DOMAIN NOTICE -* National Center for Biotechnology Information -* -* This software/database is a "United States Government Work" under the -* terms of the United States Copyright Act. It was written as part of -* the author's official duties as a United States Government employee and -* thus cannot be copyrighted. This software/database is freely available -* to the public for use. The National Library of Medicine and the U.S. -* Government have not placed any restriction on its use or reproduction. -* -* Although all reasonable efforts have been taken to ensure the accuracy -* and reliability of the software and data, the NLM and the U.S. -* Government do not and cannot warrant the performance or results that -* may be obtained by using this software or data. The NLM and the U.S. -* Government disclaim all warranties, express or implied, including -* warranties of performance, merchantability or fitness for any particular -* purpose. -* -* Please cite the author in any work or product based on this material. -* -* =========================================================================== -* -*/ - -package htsjdk.samtools.sra; - -import ngs.ErrorMsg; -import ngs.Read; -import ngs.ReadCollection; -import ngs.ReferenceIterator; - -import java.util.ArrayList; -import java.util.List; - -/** - * Provides some functionality which can be used by other classes - * - * Created by andrii.nikitiuk on 10/28/15. - */ -public class SRAUtils { - /** - * References are stored in SRA table in chunks of 5k bases per row, while last chunk of a reference is less or - * equal than 5k bases in size (even if the next reference follows). - * So, it will be optimal if we align reference sizes to 5k bases to read by reference rows. - */ - public static final int REFERENCE_ALIGNMENT = 5000; - - /** - * Is used to build RecordRangeInfo - * @param run open read collection - * @return total number of reads (both aligned and unaligned) in SRA archive - * @throws ErrorMsg - */ - public static long getNumberOfReads(ReadCollection run) throws ErrorMsg { - return run.getReadCount(Read.all); - } - - /** - * Loads reference lengths from a read collection. - * Aligns reference lengths by REFERENCE_ALIGNMENT bases for optimal loads of alignments - * (references are stored in REFERENCE_ALIGNMENT bases chunks in SRA table) - * - * Is used to build RecordRangeInfo - * @param run single opened read collection - * @return list with references lengths - * @throws ErrorMsg - */ - public static List getReferencesLengthsAligned(ReadCollection run) throws ErrorMsg { - ReferenceIterator refIt = run.getReferences(); - List lengths = new ArrayList(); - while (refIt.nextReference()) { - long refLen = refIt.getLength(); - // lets optimize references so they always align in 5000 bases positions - if (refLen % REFERENCE_ALIGNMENT != 0) { - refLen += REFERENCE_ALIGNMENT - (refLen % REFERENCE_ALIGNMENT); - } - lengths.add(refLen); - } - return lengths; - } -} diff --git a/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java b/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java index b7c6c54be6..ff8c3b3857 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java +++ b/src/main/java/htsjdk/samtools/util/AbstractAsyncWriter.java @@ -57,8 +57,11 @@ public void write(final T item) { if (this.isClosed.get()) throw new RuntimeIOException("Attempt to add record to closed writer."); checkAndRethrow(); - try { this.queue.put(item); } - catch (final InterruptedException ie) { throw new RuntimeException("Interrupted queueing item for writing.", ie); } + try { + this.queue.put(item); + } catch (final InterruptedException ie) { + throw new RuntimeException("Interrupted queueing item for writing.", ie); + } checkAndRethrow(); } @@ -72,15 +75,15 @@ public void close() { if (!this.isClosed.getAndSet(true)) { try { - this.writer.join(); + this.writer.join(); } catch (final InterruptedException ie) { - throw new RuntimeException("Interrupted waiting on writer thread.", ie); - } + throw new RuntimeException("Interrupted waiting on writer thread.", ie); + } - //The queue should be empty but if it's not, we'll drain it here to protect against any lost data. - //There's no need to timeout on poll because poll is called only when queue is not empty and + // The queue should be empty but if it's not, we'll drain it here to protect against any lost data. + // There's no need to timeout on poll because poll is called only when queue is not empty and // at this point the writer thread is definitely dead and noone is removing items from the queue. - //The item pulled will never be null (same reasoning). + // The item pulled will never be null (same reasoning). while (!this.queue.isEmpty()) { final T item = queue.poll(); synchronouslyWrite(item); @@ -113,21 +116,20 @@ private class WriterRunnable implements Runnable { @Override public void run() { try { - //The order of the two conditions is important, see https://github.com/samtools/htsjdk/issues/564 - //because we want to make sure that emptiness status of the queue does not change after we have evaluated isClosed - //as it is now (isClosed checked before queue.isEmpty), - //the two operations are effectively atomic if isClosed returns true + // The order of the two conditions is important, see https://github.com/samtools/htsjdk/issues/564 + // because we want to make sure that emptiness status of the queue does not change after we have + // evaluated isClosed + // as it is now (isClosed checked before queue.isEmpty), + // the two operations are effectively atomic if isClosed returns true while (!isClosed.get() || !queue.isEmpty()) { try { final T item = queue.poll(50, TimeUnit.MILLISECONDS); if (item != null) synchronouslyWrite(item); - } - catch (final InterruptedException ie) { + } catch (final InterruptedException ie) { /* Do Nothing */ } } - } - catch (final Throwable t) { + } catch (final Throwable t) { ex.compareAndSet(null, t); // In case a writer was blocking on a full queue before ex has been set, clear the queue // so that the writer will no longer be blocked so that it can see the exception. diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java index 95150e0c51..842bca66f2 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java +++ b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMSequenceRecord; - import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -53,7 +52,7 @@ public class AbstractLocusInfo implements Loc /** * Initial size for the list of AbstractRecordAndOffset objects **/ - private final static int INITIAL_LIST_SIZE = 100; + private static final int INITIAL_LIST_SIZE = 100; /** * List of aligned to current position reads @@ -132,11 +131,11 @@ public int getSequenceLength() { return referenceSequence.getSequenceLength(); } - /** + /** * @return the number of records overlapping the position */ - public int size() { - return this.recordAndOffsets.size(); + public int size() { + return this.recordAndOffsets.size(); } /** diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java index 7521be0bfb..77c622e43d 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; - import htsjdk.samtools.Cigar; import htsjdk.samtools.CigarElement; import htsjdk.samtools.CigarOperator; @@ -38,7 +37,6 @@ import htsjdk.samtools.filter.FilteringSamIterator; import htsjdk.samtools.filter.SamRecordFilter; import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter; - import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -54,16 +52,16 @@ * @author Darina_Nikolaeva@epam.com, EPAM Systems, Inc. * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. */ - -public abstract class AbstractLocusIterator> implements Iterable, CloseableIterator { +public abstract class AbstractLocusIterator> + implements Iterable, CloseableIterator { static final Log LOG = Log.getInstance(AbstractLocusIterator.class); private final SamReader samReader; - final private ReferenceSequenceMask referenceSequenceMask; + private final ReferenceSequenceMask referenceSequenceMask; private PeekableIterator samIterator; - private List samFilters = Arrays.asList(new SecondaryOrSupplementaryFilter(), - new DuplicateReadFilter()); + private List samFilters = + Arrays.asList(new SecondaryOrSupplementaryFilter(), new DuplicateReadFilter()); final List intervals; /** @@ -142,13 +140,10 @@ public abstract class AbstractLocusIterator locusComparator = new LocusComparator<>(); - - public SAMFileHeader getHeader() { return this.samReader.getFileHeader(); } - /** * Prepare to iterate through the given SAM records, skipping non-primary alignments * @@ -160,9 +155,11 @@ public SAMFileHeader getHeader() { */ public AbstractLocusIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) { final String className = this.getClass().getSimpleName(); - if (samReader.getFileHeader().getSortOrder() == null || samReader.getFileHeader().getSortOrder() == SAMFileHeader.SortOrder.unsorted) { - LOG.warn(className + " constructed with samReader that has SortOrder == unsorted. ", "" + - "Assuming SAM is coordinate sorted, but exceptions may occur if it is not."); + if (samReader.getFileHeader().getSortOrder() == null + || samReader.getFileHeader().getSortOrder() == SAMFileHeader.SortOrder.unsorted) { + LOG.warn( + className + " constructed with samReader that has SortOrder == unsorted. ", + "" + "Assuming SAM is coordinate sorted, but exceptions may occur if it is not."); } else if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new SAMException(className + " cannot operate on a SAM file that is not coordinate sorted."); } @@ -170,10 +167,15 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter this.useIndex = useIndex; if (intervalList != null) { try { - SequenceUtil.assertSequenceDictionariesEqual(intervalList.getHeader().getSequenceDictionary(), getHeader().getSequenceDictionary()); + SequenceUtil.assertSequenceDictionariesEqual( + intervalList.getHeader().getSequenceDictionary(), + getHeader().getSequenceDictionary()); } catch (final SequenceUtil.SequenceListsDifferException ex) { - throw new SequenceUtil.SequenceListsDifferException("The sequence dictionary of the interval list file " + - "differs from the sequence dictionary of the input SAM file: (" + samReader.getResourceDescription() + ")", ex); + throw new SequenceUtil.SequenceListsDifferException( + "The sequence dictionary of the interval list file " + + "differs from the sequence dictionary of the input SAM file: (" + + samReader.getResourceDescription() + ")", + ex); } final IntervalList uniquedIntervalList = intervalList.uniqued(); this.intervals = uniquedIntervalList.getIntervals(); @@ -184,7 +186,6 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter } } - /** * @return iterator over all/all covered locus position in reference according to emitUncoveredLoci * value. @@ -192,11 +193,13 @@ public AbstractLocusIterator(final SamReader samReader, final IntervalList inter @Override public Iterator iterator() { if (samIterator != null) { - throw new IllegalStateException("Cannot call iterator() more than once on " + this.getClass().getSimpleName()); + throw new IllegalStateException("Cannot call iterator() more than once on " + + this.getClass().getSimpleName()); } CloseableIterator tempIterator; if (intervals != null) { - tempIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(samReader, intervals, useIndex); + tempIterator = new SamRecordIntervalIteratorFactory() + .makeSamRecordIntervalIterator(samReader, intervals, useIndex); } else { tempIterator = samReader.iterator(); } @@ -208,7 +211,7 @@ public Iterator iterator() { } /** - * Closes inner SamIterator. + * Closes inner {@code SamIterator}. */ @Override public void close() { @@ -253,9 +256,9 @@ private boolean hasRemainingMaskBases() { // If not emitting uncovered loci, this check is irrelevant return false; } - return (lastReferenceSequence < referenceSequenceMask.getMaxSequenceIndex() || - (lastReferenceSequence == referenceSequenceMask.getMaxSequenceIndex() && - lastPosition < referenceSequenceMask.nextPosition(lastReferenceSequence, lastPosition))); + return (lastReferenceSequence < referenceSequenceMask.getMaxSequenceIndex() + || (lastReferenceSequence == referenceSequenceMask.getMaxSequenceIndex() + && lastPosition < referenceSequenceMask.nextPosition(lastReferenceSequence, lastPosition))); } /** @@ -277,7 +280,6 @@ public K next() { if (rec.getReferenceIndex() == -1) { this.finishedAlignedReads = true; continue; - } // Skip over an unaligned read that has been forced to be sorted with the aligned reads if (rec.getReadUnmappedFlag() @@ -291,8 +293,9 @@ public K next() { // emit everything that is before the start of the current read by 2 positions, because we know no more // coverage and insertions will be accumulated for those loci. - while (!accumulator.isEmpty() && (locusComparator.compare(accumulator.get(0), alignmentStart) < -1 - || accumulator.get(0).getSequenceIndex() != alignmentStart.getSequenceIndex())) { + while (!accumulator.isEmpty() + && (locusComparator.compare(accumulator.get(0), alignmentStart) < -1 + || accumulator.get(0).getSequenceIndex() != alignmentStart.getSequenceIndex())) { final K first = accumulator.get(0); populateCompleteQueue(alignmentStart); if (!complete.isEmpty()) { @@ -332,8 +335,8 @@ public K next() { if (!complete.isEmpty()) { return complete.remove(0); } else if (emitUncoveredLoci) { - final Locus afterLastMaskPositionLocus = new LocusImpl(referenceSequenceMask.getMaxSequenceIndex(), - referenceSequenceMask.getMaxPosition() + 1); + final Locus afterLastMaskPositionLocus = new LocusImpl( + referenceSequenceMask.getMaxSequenceIndex(), referenceSequenceMask.getMaxPosition() + 1); // In this case... we're past the last read from SAM so see if we can // fill out any more (zero coverage) entries from the mask return createNextUncoveredLocusInfo(afterLastMaskPositionLocus); @@ -348,20 +351,23 @@ private void validateRecordPosition(final SAMRecord rec) { if (!accumulator.isEmpty()) { if (accumulator.get(0).getSequenceIndex() != rec.getReferenceIndex() || rec.getAlignmentStart() - accumulator.get(0).getPosition() > 1) { - throw new IllegalStateException("Accumulator should be empty or aligned with current or previous SAMRecord"); + throw new IllegalStateException( + "Accumulator should be empty or aligned with current or previous SAMRecord"); } } } - /** * @return true if we have surpassed the maximum accumulation threshold for the first locus in the accumulator, false otherwise */ - private boolean surpassedAccumulationThreshold() { - final boolean surpassesThreshold = !accumulator.isEmpty() && accumulator.get(0).getRecordAndOffsets().size() >= maxReadsToAccumulatePerLocus; + final boolean surpassesThreshold = !accumulator.isEmpty() + && accumulator.get(0).getRecordAndOffsets().size() >= maxReadsToAccumulatePerLocus; if (surpassesThreshold && !enforcedAccumulationLimit) { - LOG.warn("We have encountered greater than " + maxReadsToAccumulatePerLocus + " reads at position " + accumulator.get(0).toString() + " and will ignore the remaining reads at this position. Note that further warnings will be suppressed."); + LOG.warn( + "We have encountered greater than " + maxReadsToAccumulatePerLocus + " reads at position " + + accumulator.get(0).toString() + + " and will ignore the remaining reads at this position. Note that further warnings will be suppressed."); enforcedAccumulationLimit = true; } return surpassesThreshold; @@ -375,7 +381,6 @@ private boolean surpassedAccumulationThreshold() { */ abstract void accumulateSamRecord(final SAMRecord rec); - /** * Requires that the accumulator for the record is previously fill with * {@link #accumulateSamRecord(htsjdk.samtools.SAMRecord)}. @@ -399,11 +404,11 @@ private boolean surpassedAccumulationThreshold() { * @return a zero-coverage AbstractLocusInfo, or null if there is none before the stopBefore locus */ private K createNextUncoveredLocusInfo(final Locus stopBeforeLocus) { - while (lastReferenceSequence <= stopBeforeLocus.getSequenceIndex() && - lastReferenceSequence <= referenceSequenceMask.getMaxSequenceIndex()) { + while (lastReferenceSequence <= stopBeforeLocus.getSequenceIndex() + && lastReferenceSequence <= referenceSequenceMask.getMaxSequenceIndex()) { - if (lastReferenceSequence == stopBeforeLocus.getSequenceIndex() && - lastPosition + 1 >= stopBeforeLocus.getPosition()) { + if (lastReferenceSequence == stopBeforeLocus.getSequenceIndex() + && lastPosition + 1 >= stopBeforeLocus.getPosition()) { return null; } @@ -418,7 +423,8 @@ private K createNextUncoveredLocusInfo(final Locus stopBeforeLocus) { } lastReferenceSequence++; lastPosition = 0; - } else if (lastReferenceSequence < stopBeforeLocus.getSequenceIndex() || nextbit < stopBeforeLocus.getPosition()) { + } else if (lastReferenceSequence < stopBeforeLocus.getSequenceIndex() + || nextbit < stopBeforeLocus.getPosition()) { lastPosition = nextbit; return createLocusInfo(getReferenceSequence(lastReferenceSequence), lastPosition); } else if (nextbit >= stopBeforeLocus.getPosition()) { @@ -472,7 +478,6 @@ private void populateCompleteQueue(final Locus stopBeforeLocus) { // fill in any gaps based on our genome mask final int sequenceIndex = locusInfo.getSequenceIndex(); - // only add to the complete queue if it's in the mask (or we have no mask!) if (referenceSequenceMask.get(locusInfo.getSequenceIndex(), locusInfo.getPosition())) { complete.add(locusInfo); @@ -484,11 +489,12 @@ private void populateCompleteQueue(final Locus stopBeforeLocus) { private void removeSkippedRegion(Locus stopBeforeLocus) { int i = 0; - while (i < accumulator.size() && accumulator.get(i).isEmpty() && - locusComparator.compare(accumulator.get(i), stopBeforeLocus) < 0) { + while (i < accumulator.size() + && accumulator.get(i).isEmpty() + && locusComparator.compare(accumulator.get(i), stopBeforeLocus) < 0) { i++; } - if (i > 0){ + if (i > 0) { accumulator.subList(0, i).clear(); } } diff --git a/src/main/java/htsjdk/samtools/util/AbstractProgressLogger.java b/src/main/java/htsjdk/samtools/util/AbstractProgressLogger.java index d2435a5e9f..4a76ff9f13 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractProgressLogger.java +++ b/src/main/java/htsjdk/samtools/util/AbstractProgressLogger.java @@ -1,7 +1,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMRecord; - import java.text.DecimalFormat; import java.text.NumberFormat; @@ -11,7 +10,7 @@ * * Concrete subclasses must provide the logger */ -abstract public class AbstractProgressLogger implements ProgressLoggerInterface { +public abstract class AbstractProgressLogger implements ProgressLoggerInterface { private final int n; private final String verb; private final String noun; @@ -25,7 +24,7 @@ abstract public class AbstractProgressLogger implements ProgressLoggerInterface private int lastPos = 0; private String lastReadName = null; private long countNonIncreasing = 0; - final static private long PRINT_READ_NAME_THRESHOLD = 1000; + private static final long PRINT_READ_NAME_THRESHOLD = 1000; /** * Construct an AbstractProgressLogger. @@ -48,7 +47,7 @@ protected AbstractProgressLogger(final String noun, final String verb, final int * * @param message a message to be logged by the logger (recommended output level is INFO or the equivalent) */ - abstract protected void log(String ... message); + protected abstract void log(String... message); private synchronized void record() { final long now = System.currentTimeMillis(); @@ -56,8 +55,8 @@ private synchronized void record() { this.lastStartTime = now; final long seconds = (now - startTime) / 1000; - final String elapsed = formatElapseTime(seconds); - final String period = pad(fmt.format(lastPeriodSeconds), 4); + final String elapsed = formatElapseTime(seconds); + final String period = pad(fmt.format(lastPeriodSeconds), 4); final String processed = pad(fmt.format(this.processed), 13); final String readInfo; @@ -74,8 +73,19 @@ private synchronized void record() { final long n = (this.processed % this.n == 0) ? this.n : this.processed % this.n; - log(this.verb, " ", processed, " " + noun + ". Elapsed time: ", elapsed, "s. Time for last ", fmt.format(n), - ": ", period, "s. Last read position: ", readInfo, rnInfo); + log( + this.verb, + " ", + processed, + " " + noun + ". Elapsed time: ", + elapsed, + "s. Time for last ", + fmt.format(n), + ": ", + period, + "s. Last read position: ", + readInfo, + rnInfo); } /** @@ -86,8 +96,7 @@ public synchronized boolean log() { if (processed % this.n != 0) { record(); return true; - } - else { + } else { return false; } } @@ -140,10 +149,14 @@ public boolean record(final SAMRecord... recs) { } /** Returns the count of records processed. */ - public synchronized long getCount() { return this.processed; } + public synchronized long getCount() { + return this.processed; + } /** Returns the number of seconds since progress tracking began. */ - public long getElapsedSeconds() { return (System.currentTimeMillis() - this.startTime) / 1000; } + public long getElapsedSeconds() { + return (System.currentTimeMillis() - this.startTime) / 1000; + } /** Resets the start time to now and the number of records to zero. */ public synchronized void reset() { diff --git a/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java index 019997f7be..75557c1c8f 100644 --- a/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java +++ b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java @@ -31,7 +31,7 @@ * to the base and quality at the genomic position described the containing AbstractLocusInfo. One object represents * one base for SamLocusIterator.RecordAndOffset implementation or one alignment block of * SAMRecord for TypedRecordAndOffset implementation. - * + * * @author Darina_Nikolaeva@epam.com, EPAM Systems, Inc. * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. */ @@ -133,7 +133,7 @@ public String getReadName() { public byte[] getBaseQualities() { return record.getBaseQualities(); } - + /** * @return the base quality according to offset. */ @@ -143,7 +143,8 @@ public byte getBaseQuality() { protected void validateOffset(int offset, final byte[] array) { if (offset < 0 || offset >= array.length) { - throw new IllegalArgumentException("The requested position is not covered by this " + this.getClass().getSimpleName() + " object. " + throw new IllegalArgumentException("The requested position is not covered by this " + + this.getClass().getSimpleName() + " object. " + "\n Offset = " + offset + " Array length = " + array.length + "\n Record is: " + getRecord().toString()); } diff --git a/src/main/java/htsjdk/samtools/util/AsciiWriter.java b/src/main/java/htsjdk/samtools/util/AsciiWriter.java index 50b08d8443..5a83703060 100644 --- a/src/main/java/htsjdk/samtools/util/AsciiWriter.java +++ b/src/main/java/htsjdk/samtools/util/AsciiWriter.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.Defaults; - import java.io.IOException; import java.io.OutputStream; import java.io.Writer; diff --git a/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java index 66b188b7f3..66b2e27859 100644 --- a/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java +++ b/src/main/java/htsjdk/samtools/util/AsyncBlockCompressedInputStream.java @@ -23,11 +23,9 @@ */ package htsjdk.samtools.util; - import htsjdk.samtools.Defaults; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.zip.InflaterFactory; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -40,22 +38,24 @@ import java.util.concurrent.ThreadFactory; /** - * Asynchronous read-ahead implementation of {@link htsjdk.samtools.util.BlockCompressedInputStream}. - * - * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. + * Asynchronous read-ahead implementation of {@link htsjdk.samtools.util.BlockCompressedInputStream}. + * + * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. */ public class AsyncBlockCompressedInputStream extends BlockCompressedInputStream { - private static final int READ_AHEAD_BUFFERS = (int)Math.ceil((double) Defaults.NON_ZERO_BUFFER_SIZE / BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); - private static final Executor threadpool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(),new ThreadFactory() { - @Override - public Thread newThread(Runnable r) { - Thread t = Executors.defaultThreadFactory().newThread(r); - t.setDaemon(true); - return t; - } - }); + private static final int READ_AHEAD_BUFFERS = (int) Math.ceil( + (double) Defaults.NON_ZERO_BUFFER_SIZE / BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); + private static final Executor threadpool = + Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors(), new ThreadFactory() { + @Override + public Thread newThread(Runnable r) { + Thread t = Executors.defaultThreadFactory().newThread(r); + t.setDaemon(true); + return t; + } + }); /** - * Next blocks (in stream order) that have already been decompressed. + * Next blocks (in stream order) that have already been decompressed. */ private final BlockingQueue mResult = new ArrayBlockingQueue<>(READ_AHEAD_BUFFERS); /** @@ -85,13 +85,11 @@ public AsyncBlockCompressedInputStream(final InputStream stream, InflaterFactory super(stream, true, inflaterFactory); } - public AsyncBlockCompressedInputStream(final File file) - throws IOException { + public AsyncBlockCompressedInputStream(final File file) throws IOException { super(file); } - public AsyncBlockCompressedInputStream(final File file, InflaterFactory inflaterFactory) - throws IOException { + public AsyncBlockCompressedInputStream(final File file, InflaterFactory inflaterFactory) throws IOException { super(file, inflaterFactory); } @@ -118,7 +116,7 @@ protected DecompressedBlock nextBlock(byte[] bufferAvailableForReuse) { } return nextBlockSync(); } - + @Override protected void prepareForSeek() { flushReadAhead(); @@ -203,7 +201,7 @@ private void tryQueueTask() { /** * Foreground thread blocking operation that retrieves the next read-ahead buffer. * Lazy initiation of read-ahead is performed if required. - * @return next decompressed block in input stream + * @return next decompressed block in input stream */ private DecompressedBlock nextBlockSync() { ensureReadAhead(); @@ -216,6 +214,7 @@ private DecompressedBlock nextBlockSync() { ensureReadAhead(); return nextBlock; } + private class AsyncBlockCompressedInputStreamRunnable implements Runnable { /** * Thread pool operation that fills the read-ahead queue diff --git a/src/main/java/htsjdk/samtools/util/AsyncBufferedIterator.java b/src/main/java/htsjdk/samtools/util/AsyncBufferedIterator.java index bf78ecb9ce..8315d987f7 100644 --- a/src/main/java/htsjdk/samtools/util/AsyncBufferedIterator.java +++ b/src/main/java/htsjdk/samtools/util/AsyncBufferedIterator.java @@ -36,10 +36,10 @@ * Iterator that uses a dedicated background thread to perform read-ahead to improve * throughput at the expense of increased latency. This iterator will block * until the background thread has read a full buffer of records. - * + * * Note that this implementation is not synchronized. If multiple threads - * access an instance concurrently, it must be synchronized externally. - * + * access an instance concurrently, it must be synchronized externally. + * * @author Daniel Cameron * */ @@ -53,6 +53,7 @@ public class AsyncBufferedIterator implements CloseableIterator { * a deadlock due to task dependencies. */ private Thread backgroundThread; + private final Iterator underlyingIterator; private final BlockingQueue> buffers; private IteratorBuffer currentBlock = new IteratorBuffer<>(Collections.emptyList()); @@ -60,7 +61,7 @@ public class AsyncBufferedIterator implements CloseableIterator { /** * Creates a new iterator that traverses the given iterator on a background * thread - * + * * @param iterator iterator to traverse * @param bufferSize size of read-ahead buffer. A larger size will increase both throughput and latency. * Double buffering is used so the maximum number of records on which read-ahead is performed is twice this. @@ -68,11 +69,11 @@ public class AsyncBufferedIterator implements CloseableIterator { public AsyncBufferedIterator(final Iterator iterator, final int bufferSize) { this(iterator, bufferSize, 1, null); } - + /** * Creates a new iterator that traverses the given iterator on a background * thread - * + * * @param iterator iterator to traverse * @param bufferSize size of each read-ahead buffer. A larger size will increase both throughput and latency. * @param bufferCount number of read-ahead buffers @@ -84,13 +85,14 @@ public AsyncBufferedIterator(final Iterator iterator, final int bufferSize, f /** * Creates a new iterator that traverses the given iterator on a background * thread - * + * * @param iterator iterator to traverse * @param bufferSize size of each read-ahead buffer. A larger size will increase both throughput and latency. * @param bufferCount number of read-ahead buffers * @param threadName background thread name. A name will be automatically generated if this parameter is null. */ - public AsyncBufferedIterator(final Iterator iterator, final int bufferSize, final int bufferCount, final String threadName) { + public AsyncBufferedIterator( + final Iterator iterator, final int bufferSize, final int bufferCount, final String threadName) { if (iterator == null) throw new IllegalArgumentException("iterator cannot be null"); if (bufferCount <= 0) throw new IllegalArgumentException("Must use at least 1 buffer."); if (bufferSize <= 0) throw new IllegalArgumentException("Buffer size must be at least 1 record."); @@ -98,12 +100,14 @@ public AsyncBufferedIterator(final Iterator iterator, final int bufferSize, f this.buffers = new ArrayBlockingQueue<>(bufferCount); this.bufferSize = bufferSize; int threadNumber = threadsCreated.incrementAndGet(); - this.backgroundThread = new Thread(new Runnable() { - @Override - public void run() { - backgroundRun(); - } - }, threadName != null ? threadName : getThreadNamePrefix() + threadNumber); + this.backgroundThread = new Thread( + new Runnable() { + @Override + public void run() { + backgroundRun(); + } + }, + threadName != null ? threadName : getThreadNamePrefix() + threadNumber); this.backgroundThread.setDaemon(true); log.debug("Starting thread " + this.backgroundThread.getName()); this.backgroundThread.start(); @@ -129,7 +133,7 @@ public void close() { } } } - + private void ensureHasNext() { if (!currentBlock.hasNext()) { // Rethrow any exceptions raised on the background thread @@ -162,7 +166,7 @@ public boolean hasNext() { /** * Raises any exception encountered when processing records on - * the background thread back to the foreground caller + * the background thread back to the foreground caller * @throws Error */ private void raiseBackgroundThreadException() throws Error { @@ -188,7 +192,7 @@ public T next() { /** * Performs 1 buffer worth of read-ahead on the underlying iterator - * (background thread method) + * (background thread method) */ private IteratorBuffer readAhead() { List readAhead = null; @@ -210,7 +214,7 @@ private IteratorBuffer readAhead() { } /** * Background thread run loop - * @throws InterruptedException + * @throws InterruptedException */ private void backgroundRun() { try { @@ -228,13 +232,15 @@ private void backgroundRun() { } } /** - * Block of records from the underlying iterator + * Block of records from the underlying iterator */ private static class IteratorBuffer implements Iterator { private final Throwable exception; private final Iterator it; + public IteratorBuffer(Iterable it) { - this.it = it != null ? it.iterator() : null;; + this.it = it != null ? it.iterator() : null; + ; this.exception = null; } @@ -247,9 +253,9 @@ public IteratorBuffer(Iterable it, Throwable exception) { this.it = it != null ? it.iterator() : null; this.exception = exception; } - + /** - * Record block indicating end of stream + * Record block indicating end of stream */ public IteratorBuffer() { this.it = null; @@ -265,11 +271,11 @@ public boolean hasNext() { public U next() { return it.next(); } - + public boolean isEndOfStream() { return it == null; } - + /** * Exception thrown when attempting to retrieve records from the underlying stream * @return exception thrown on background thread, null if no exception occurred diff --git a/src/main/java/htsjdk/samtools/util/BinaryCodec.java b/src/main/java/htsjdk/samtools/util/BinaryCodec.java index 51e23fed09..2c63319c38 100644 --- a/src/main/java/htsjdk/samtools/util/BinaryCodec.java +++ b/src/main/java/htsjdk/samtools/util/BinaryCodec.java @@ -1,688 +1,678 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.util; - -import java.io.ByteArrayInputStream; -import java.io.Closeable; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.SyncFailedException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.file.Files; -import java.nio.file.Path; - -/** - * Encapsulates file representation of various primitive data types. Forces little-endian disk representation. - * Note that this class is currently not very efficient. There are plans to increase the size of the ByteBuffer, - * and move data between the ByteBuffer and the underlying input or output stream in larger chunks. - * - * All the read methods throw RuntimeEOFException if the input stream is exhausted before the required number - * of bytes are read. - * - * @author Dave Tefft - */ -public class BinaryCodec implements Closeable { - - //Outstream to write to - private OutputStream outputStream; - //If a file or filename was given it will be stored here. Used for error reporting. - private String outputFileName; - - //Input stream to read from - private InputStream inputStream; - //If a file or filename was give to read from it will be stored here. Used for error reporting. - private String inputFileName; - - /* - Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. - This is set to true if it is writing to a binary file - Right now we don't support reading and writing to the same file with the same BinaryCodec instance - */ - private boolean isWriting; - - /** - * For byte swapping. - */ - private ByteBuffer byteBuffer; - - /** - * For reading Strings of known length, this can reduce object creation - */ - private final byte[] scratchBuffer = new byte[16]; - - // Byte order used in BAM files. - private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN; - private static final byte NULL_BYTE[] = {0}; - - public static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1; - public static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1; - public static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1; - - // We never serialize more than this much at a time (except for Strings) - private static final int MAX_BYTE_BUFFER = 8; - - ////////////////////////////////////////////////// - // Constructors // - ////////////////////////////////////////////////// - - /** - * Constructs BinaryCodec from a file and set its mode to writing or not - * - * @param path file to be written to or read from - * @param writing whether the file is being written to - */ - public BinaryCodec(final Path path, final boolean writing) { - this(); - try { - this.isWriting = writing; - if (this.isWriting) { - this.outputStream = IOUtil.maybeBufferOutputStream(Files.newOutputStream(path)); - this.outputFileName = path.getFileName().toString(); - } else { - this.inputStream = IOUtil.maybeBufferInputStream(Files.newInputStream(path)); - this.inputFileName = path.getFileName().toString(); - } - } catch (FileNotFoundException e) { - throw new RuntimeIOException("File not found: " + path, e); - } catch (IOException e) { - throw new RuntimeIOException("Error opening: " + path, e); - } - } - - /** - * Constructs BinaryCodec from a file and set its mode to writing or not - * - * @param file file to be written to or read from - * @param writing whether the file is being written to - */ - public BinaryCodec(final File file, final boolean writing) { - this(IOUtil.toPath(file), writing); - } - - /** - * Constructs BinaryCodec from a file name and set its mode to writing or not - * - * @param fileName name of the file to be written to or read from - * @param writing writing whether the file is being written to - */ - public BinaryCodec(final String fileName, final boolean writing) { - this(new File(fileName), writing); - } - - /** - * Constructs BinaryCodec from an output stream - * - * @param outputStream Stream to write to, since it's an output stream we know that isWriting - * should be set to true - */ - public BinaryCodec(final OutputStream outputStream) { - this(); - setOutputStream(outputStream); - } - - /** - * Constructs BinaryCodec from an input stream - * - * @param inputStream Stream to read from, since we are reading isWriting is set to false - */ - public BinaryCodec(final InputStream inputStream) { - this(); - setInputStream(inputStream); - } - - /** - * Ambiguous whether reading or writing until set{In,Out}putStream is called - */ - public BinaryCodec() { - initByteBuffer(); - } - - /** - * Shared among ctors. - * Note that if endianness is changed, all the unsigned methods must also be changed. - */ - private void initByteBuffer() { - byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER); - byteBuffer.order(LITTLE_ENDIAN); - } - - ////////////////////////////////////////////////// - // Writing methods // - ////////////////////////////////////////////////// - - - /** - * Write whatever has been put into the byte buffer - * @param numBytes -- how much to write. Note that in case of writing an unsigned value, - * more bytes were put into the ByteBuffer than will get written out. - */ - private void writeByteBuffer(final int numBytes) { - assert(numBytes <= byteBuffer.limit()); - writeBytes(byteBuffer.array(), 0, numBytes); - } - - /** - * Writes a byte to the output buffer - * - * @param bite byte array to write - */ - public void writeByte(final byte bite) { - byteBuffer.clear(); - byteBuffer.put(bite); - writeByteBuffer(1); - } - - public void writeByte(final int b) { - writeByte((byte)b); - } - - /** - * Writes a byte array to the output buffer - * - * @param bytes value to write - */ - public void writeBytes(final byte[] bytes) { - writeBytes(bytes, 0, bytes.length); - } - - public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) { - if (!isWriting) { - throw new IllegalStateException("Calling write method on BinaryCodec open for read."); - } - try { - outputStream.write(bytes, startOffset, numBytes); - } catch (IOException e) { - throw new RuntimeIOException(constructErrorMessage("Write error"), e); - } - } - - /** - * Write a 32-bit int to the output stream - * - * @param value int to write - */ - public void writeInt(final int value) { - byteBuffer.clear(); - byteBuffer.putInt(value); - writeByteBuffer(4); - } - - /** - * Write a double (8 bytes) to the output stream - * - * @param value double to write - */ - public void writeDouble(final double value) { - byteBuffer.clear(); - byteBuffer.putDouble(value); - writeByteBuffer(8); - } - - /** - * Write a 64-bit long to the output stream - * - * @param value long to write - */ - public void writeLong(final long value) { - byteBuffer.clear(); - byteBuffer.putLong(value); - writeByteBuffer(8); - } - - - /** - * Write a 16-bit short to output stream - */ - public void writeShort(final short value) { - byteBuffer.clear(); - byteBuffer.putShort(value); - writeByteBuffer(2); - } - - /** - * Write a float (4 bytes) to the output stream - * - * @param value float to write - */ - public void writeFloat(final float value) { - byteBuffer.clear(); - byteBuffer.putFloat(value); - writeByteBuffer(4); - } - - /** - * Writes a boolean (1 byte) to the output buffer - * - * @param value boolean to write - */ - public void writeBoolean(final boolean value) { - byteBuffer.clear(); - byteBuffer.put(value ? (byte)1 : (byte)0); - writeByteBuffer(1); - } - - /** - * Writes a string to the buffer as ASCII bytes - * - * @param value string to write to buffer - * @param writeLength prefix the string with the length as a 32-bit int - * @param appendNull add a null byte to the end of the string - */ - public void writeString(final String value, final boolean writeLength, final boolean appendNull) { - if (writeLength) { - int lengthToWrite = value.length(); - if (appendNull) lengthToWrite++; - writeInt(lengthToWrite); - } - - //Actually writes the string to a buffer - writeString(value); - - if (appendNull) writeBytes(NULL_BYTE); - - } - - - /** - * Write a string to the buffer as ASCII bytes - * - * @param value string to write - */ - private void writeString(final String value) { - writeBytes(StringUtil.stringToBytes(value)); - } - - /** - * Write an 8-bit unsigned byte. - * NOTE: This method will break if we change to big-endian. - */ - public void writeUByte(final short val) { - if (val < 0) { - throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); - } - if (val > MAX_UBYTE) { - throw new IllegalArgumentException("Value (" + val + ") to large to be written as ubyte."); - } - byteBuffer.clear(); - byteBuffer.putShort(val); - writeByteBuffer(1); - } - - /** - * Write a 16-bit unsigned short. - * NOTE: This method will break if we change to big-endian. - */ - public void writeUShort(final int val) { - if (val < 0) { - throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); - } - if (val > MAX_USHORT) { - throw new IllegalArgumentException("Value (" + val + ") too large to be written as ushort."); - } - byteBuffer.clear(); - byteBuffer.putInt(val); - writeByteBuffer(2); - } - - /** - * Write a 32-bit unsigned int. - * NOTE: This method will break if we change to big-endian. - */ - public void writeUInt(final long val) { - if (val < 0) { - throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); - } - if (val > MAX_UINT) { - throw new IllegalArgumentException("Value (" + val + ") to large to be written as uint."); - } - byteBuffer.clear(); - byteBuffer.putLong(val); - writeByteBuffer(4); - } - - ////////////////////////////////////////////////// - // Reading methods // - ////////////////////////////////////////////////// - - /** - * Read a byte array from the input stream. - * - * @throws htsjdk.samtools.util.RuntimeEOFException if fewer than buffer.length bytes to read - */ - public void readBytes(final byte[] buffer) { - readBytes(buffer, 0, buffer.length); - } - - /** - * Read a byte array from the input stream - * - * @param buffer where to put bytes read - * @param offset offset to start putting bytes into buffer - * @param length number of bytes to read - * @throws RuntimeEOFException if fewer than length bytes to read - */ - public void readBytes(final byte[] buffer, final int offset, final int length) { - int totalNumRead = 0; - do { - final int numRead = readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead); - if (numRead < 0) { - String msg = String.format("Premature EOF. Expected %d but only received %d", length, totalNumRead); - throw new RuntimeEOFException(constructErrorMessage(msg)); - } else { - totalNumRead += numRead; - } - } while (totalNumRead < length); - } - - /** - * Reads a byte array from the input stream. - * - * @param buffer where to put bytes read - * @param offset offset to start putting bytes into buffer - * @param length number of bytes to read. Fewer bytes may be read if EOF is reached before length bytes - * have been read. - * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of the stream has been reached. - */ - public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) { - if (isWriting) { - throw new IllegalStateException("Calling read method on BinaryCodec open for write."); - } - try { - // Some implementations of InputStream do not behave well when the buffer is empty and length is zero, for - // example ByteArrayInputStream, so we must check for length equal to zero. - // See: https://bugs.java.com/view_bug.do?bug_id=6766844 - return (length == 0) ? 0 : inputStream.read(buffer, offset, length); - } catch (IOException e) { - throw new RuntimeIOException(constructErrorMessage("Read error"), e); - } - } - - /** - * @return a single byte read from the input stream. - */ - public byte readByte() { - if (isWriting) { - throw new IllegalStateException("Calling read method on BinaryCodec open for write."); - } - try { - final int ret = inputStream.read(); - if (ret == -1) { - throw new RuntimeEOFException(constructErrorMessage("Premature EOF")); - } - return (byte)ret; - } catch (IOException e) { - throw new RuntimeIOException(constructErrorMessage("Read error"), e); - } - } - - /** - * @return true if it is possible to know for sure if at EOF, and it is known for sure. - * If the input stream is a ByteArrayInputStream, this is faster than causing a RuntimeEOFException - * to be thrown. - */ - public boolean knownAtEof() { - if (isWriting) { - throw new IllegalStateException("Calling knownAtEof method on BinaryCodec open for write."); - } - try { - return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0; - } catch (IOException e) { - throw new RuntimeIOException(constructErrorMessage("available() error"), e); - } - } - - /** - * Read a string off the input stream, as ASCII bytes - * - * @param length length of string to read - * @return String read from stream - */ - public String readString(final int length) { - final byte[] buffer; - // Recycle single buffer if possible - if (length <= scratchBuffer.length) { - buffer = scratchBuffer; - } else { - buffer = new byte[length]; - - } - readBytes(buffer, 0, length); - - return StringUtil.bytesToString(buffer, 0, length); - } - - /** - * Read ASCII bytes from the input stream until a null byte is read - * @return String constructed from the ASCII bytes read - */ - public String readNullTerminatedString() { - return StringUtil.readNullTerminatedString(this); - } - - /** - * Read an int length, and then a String of that length - * @param devourNull if true, the length include a null terminator, which is read and discarded - */ - public String readLengthAndString(final boolean devourNull) { - int length = readInt(); - if (devourNull) { - --length; - } - final String ret = readString(length); - if (devourNull) { - readByte(); - } - return ret; - } - - private void readByteBuffer(final int numBytes) { - assert(numBytes <= byteBuffer.capacity()); - readBytes(byteBuffer.array(), 0, numBytes); - byteBuffer.limit(byteBuffer.capacity()); - byteBuffer.position(numBytes); - } - - /** - * Read an int off the input stream - * - * @return int from input stream - */ - public int readInt() { - readByteBuffer(4); - byteBuffer.flip(); - return byteBuffer.getInt(); - } - - /** - * Reads a double off the input stream - * - * @return double - */ - public double readDouble() { - readByteBuffer(8); - byteBuffer.flip(); - return byteBuffer.getDouble(); - } - - /** - * Reads a long off the input stream - * - * @return long - */ - public long readLong() { - readByteBuffer(8); - byteBuffer.flip(); - return byteBuffer.getLong(); - } - - public short readShort() { - readByteBuffer(2); - byteBuffer.flip(); - return byteBuffer.getShort(); - } - - /** - * Reads a float off the input stream - * - * @return float - */ - public float readFloat() { - readByteBuffer(4); - byteBuffer.flip(); - return byteBuffer.getFloat(); - } - - /** - * Reads a boolean off the input stream, represented as a byte with value 1 or 0 - * - * @return boolean - */ - public boolean readBoolean() { - return (((int)readByte()) == 1); - } - - /** - * Reads an 8-bit unsigned byte from the input stream. - * This method assumes little-endianness. - */ - public short readUByte() { - readByteBuffer(1); - byteBuffer.put((byte)0); - byteBuffer.flip(); - return byteBuffer.getShort(); - } - - /** - * Reads a 16-bit unsigned short from the input stream. - * This method assumes little-endianness. - */ - public int readUShort() { - readByteBuffer(2); - byteBuffer.putShort((short)0); - byteBuffer.flip(); - return byteBuffer.getInt(); - } - - /** - * Reads a 32-bit unsigned int from the input stream. - * This method assumes little-endianness. - */ - public long readUInt() { - readByteBuffer(4); - byteBuffer.putInt(0); - byteBuffer.flip(); - return byteBuffer.getLong(); - } - - /** - * Close the appropriate stream - */ - @Override - public void close() { - try { - if (this.isWriting) { - // To the degree possible, make sure the bytes get forced to the file system, - // or else cause an exception to be thrown. - this.outputStream.flush(); - if (this.outputStream instanceof FileOutputStream) { - FileOutputStream fos = (FileOutputStream)this.outputStream; - try { - fos.getFD().sync(); - } catch (SyncFailedException e) { - // Since the sync is belt-and-suspenders anyway, don't throw an exception if it fails, - // because on some OSs it will fail for some types of output. E.g. writing to /dev/null - // on some Unixes. - } - } - this.outputStream.close(); - } - else this.inputStream.close(); - } catch (IOException e) { - throw new RuntimeIOException(e.getMessage(), e); - } - } - - private String constructErrorMessage(final String msg) { - final StringBuilder sb = new StringBuilder(msg); - sb.append("; BinaryCodec in ") - .append(isWriting? "write": "read").append("mode; "); - final String filename = isWriting? outputFileName: inputFileName; - if (filename != null) { - sb.append("file: ").append(filename); - } else { - sb.append("streamed file (filename not available)"); - } - return sb.toString(); - } - - ////////////////////////////////////////////////// - // Some getters // - ////////////////////////////////////////////////// - - - public String getInputFileName() { - return inputFileName; - } - - public String getOutputFileName() { - return outputFileName; - } - - public void setOutputFileName(final String outputFileName) { - this.outputFileName = outputFileName; - } - - public void setInputFileName(final String inputFileName) { - this.inputFileName = inputFileName; - } - - public boolean isWriting() { - return isWriting; - } - - public OutputStream getOutputStream() { - return outputStream; - } - - public InputStream getInputStream() { - return inputStream; - } - - public void setInputStream(final InputStream is) { - isWriting = false; - this.inputStream = is; - } - - public void setOutputStream(final OutputStream os) { - isWriting = true; - this.outputStream = os; - - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util; + +import java.io.ByteArrayInputStream; +import java.io.Closeable; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.SyncFailedException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Encapsulates file representation of various primitive data types. Forces little-endian disk representation. + * Note that this class is currently not very efficient. There are plans to increase the size of the ByteBuffer, + * and move data between the ByteBuffer and the underlying input or output stream in larger chunks. + * + * All the read methods throw RuntimeEOFException if the input stream is exhausted before the required number + * of bytes are read. + * + * @author Dave Tefft + */ +public class BinaryCodec implements Closeable { + + // Outstream to write to + private OutputStream outputStream; + // If a file or filename was given it will be stored here. Used for error reporting. + private String outputFileName; + + // Input stream to read from + private InputStream inputStream; + // If a file or filename was give to read from it will be stored here. Used for error reporting. + private String inputFileName; + + /* + Mode that the BinaryCodec is in. It is either writing to a binary file or reading from. + This is set to true if it is writing to a binary file + Right now we don't support reading and writing to the same file with the same BinaryCodec instance + */ + private boolean isWriting; + + /** + * For byte swapping. + */ + private ByteBuffer byteBuffer; + + /** + * For reading Strings of known length, this can reduce object creation + */ + private final byte[] scratchBuffer = new byte[16]; + + // Byte order used in BAM files. + private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN; + private static final byte NULL_BYTE[] = {0}; + + public static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1; + public static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1; + public static final long MAX_UINT = ((long) Integer.MAX_VALUE * 2) + 1; + + // We never serialize more than this much at a time (except for Strings) + private static final int MAX_BYTE_BUFFER = 8; + + ////////////////////////////////////////////////// + // Constructors // + ////////////////////////////////////////////////// + + /** + * Constructs BinaryCodec from a file and set its mode to writing or not + * + * @param path file to be written to or read from + * @param writing whether the file is being written to + */ + public BinaryCodec(final Path path, final boolean writing) { + this(); + try { + this.isWriting = writing; + if (this.isWriting) { + this.outputStream = IOUtil.maybeBufferOutputStream(Files.newOutputStream(path)); + this.outputFileName = path.getFileName().toString(); + } else { + this.inputStream = IOUtil.maybeBufferInputStream(Files.newInputStream(path)); + this.inputFileName = path.getFileName().toString(); + } + } catch (FileNotFoundException e) { + throw new RuntimeIOException("File not found: " + path, e); + } catch (IOException e) { + throw new RuntimeIOException("Error opening: " + path, e); + } + } + + /** + * Constructs BinaryCodec from a file and set its mode to writing or not + * + * @param file file to be written to or read from + * @param writing whether the file is being written to + */ + public BinaryCodec(final File file, final boolean writing) { + this(IOUtil.toPath(file), writing); + } + + /** + * Constructs BinaryCodec from a file name and set its mode to writing or not + * + * @param fileName name of the file to be written to or read from + * @param writing writing whether the file is being written to + */ + public BinaryCodec(final String fileName, final boolean writing) { + this(new File(fileName), writing); + } + + /** + * Constructs BinaryCodec from an output stream + * + * @param outputStream Stream to write to, since it's an output stream we know that isWriting + * should be set to true + */ + public BinaryCodec(final OutputStream outputStream) { + this(); + setOutputStream(outputStream); + } + + /** + * Constructs BinaryCodec from an input stream + * + * @param inputStream Stream to read from, since we are reading isWriting is set to false + */ + public BinaryCodec(final InputStream inputStream) { + this(); + setInputStream(inputStream); + } + + /** + * Ambiguous whether reading or writing until set{In,Out}putStream is called + */ + public BinaryCodec() { + initByteBuffer(); + } + + /** + * Shared among ctors. + * Note that if endianness is changed, all the unsigned methods must also be changed. + */ + private void initByteBuffer() { + byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER); + byteBuffer.order(LITTLE_ENDIAN); + } + + ////////////////////////////////////////////////// + // Writing methods // + ////////////////////////////////////////////////// + + /** + * Write whatever has been put into the byte buffer + * @param numBytes -- how much to write. Note that in case of writing an unsigned value, + * more bytes were put into the ByteBuffer than will get written out. + */ + private void writeByteBuffer(final int numBytes) { + assert (numBytes <= byteBuffer.limit()); + writeBytes(byteBuffer.array(), 0, numBytes); + } + + /** + * Writes a byte to the output buffer + * + * @param bite byte array to write + */ + public void writeByte(final byte bite) { + byteBuffer.clear(); + byteBuffer.put(bite); + writeByteBuffer(1); + } + + public void writeByte(final int b) { + writeByte((byte) b); + } + + /** + * Writes a byte array to the output buffer + * + * @param bytes value to write + */ + public void writeBytes(final byte[] bytes) { + writeBytes(bytes, 0, bytes.length); + } + + public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) { + if (!isWriting) { + throw new IllegalStateException("Calling write method on BinaryCodec open for read."); + } + try { + outputStream.write(bytes, startOffset, numBytes); + } catch (IOException e) { + throw new RuntimeIOException(constructErrorMessage("Write error"), e); + } + } + + /** + * Write a 32-bit int to the output stream + * + * @param value int to write + */ + public void writeInt(final int value) { + byteBuffer.clear(); + byteBuffer.putInt(value); + writeByteBuffer(4); + } + + /** + * Write a double (8 bytes) to the output stream + * + * @param value double to write + */ + public void writeDouble(final double value) { + byteBuffer.clear(); + byteBuffer.putDouble(value); + writeByteBuffer(8); + } + + /** + * Write a 64-bit long to the output stream + * + * @param value long to write + */ + public void writeLong(final long value) { + byteBuffer.clear(); + byteBuffer.putLong(value); + writeByteBuffer(8); + } + + /** + * Write a 16-bit short to output stream + */ + public void writeShort(final short value) { + byteBuffer.clear(); + byteBuffer.putShort(value); + writeByteBuffer(2); + } + + /** + * Write a float (4 bytes) to the output stream + * + * @param value float to write + */ + public void writeFloat(final float value) { + byteBuffer.clear(); + byteBuffer.putFloat(value); + writeByteBuffer(4); + } + + /** + * Writes a boolean (1 byte) to the output buffer + * + * @param value boolean to write + */ + public void writeBoolean(final boolean value) { + byteBuffer.clear(); + byteBuffer.put(value ? (byte) 1 : (byte) 0); + writeByteBuffer(1); + } + + /** + * Writes a string to the buffer as ASCII bytes + * + * @param value string to write to buffer + * @param writeLength prefix the string with the length as a 32-bit int + * @param appendNull add a null byte to the end of the string + */ + public void writeString(final String value, final boolean writeLength, final boolean appendNull) { + if (writeLength) { + int lengthToWrite = value.length(); + if (appendNull) lengthToWrite++; + writeInt(lengthToWrite); + } + + // Actually writes the string to a buffer + writeString(value); + + if (appendNull) writeBytes(NULL_BYTE); + } + + /** + * Write a string to the buffer as ASCII bytes + * + * @param value string to write + */ + private void writeString(final String value) { + writeBytes(StringUtil.stringToBytes(value)); + } + + /** + * Write an 8-bit unsigned byte. + * NOTE: This method will break if we change to big-endian. + */ + public void writeUByte(final short val) { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); + } + if (val > MAX_UBYTE) { + throw new IllegalArgumentException("Value (" + val + ") to large to be written as ubyte."); + } + byteBuffer.clear(); + byteBuffer.putShort(val); + writeByteBuffer(1); + } + + /** + * Write a 16-bit unsigned short. + * NOTE: This method will break if we change to big-endian. + */ + public void writeUShort(final int val) { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); + } + if (val > MAX_USHORT) { + throw new IllegalArgumentException("Value (" + val + ") too large to be written as ushort."); + } + byteBuffer.clear(); + byteBuffer.putInt(val); + writeByteBuffer(2); + } + + /** + * Write a 32-bit unsigned int. + * NOTE: This method will break if we change to big-endian. + */ + public void writeUInt(final long val) { + if (val < 0) { + throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method."); + } + if (val > MAX_UINT) { + throw new IllegalArgumentException("Value (" + val + ") to large to be written as uint."); + } + byteBuffer.clear(); + byteBuffer.putLong(val); + writeByteBuffer(4); + } + + ////////////////////////////////////////////////// + // Reading methods // + ////////////////////////////////////////////////// + + /** + * Read a byte array from the input stream. + * + * @throws htsjdk.samtools.util.RuntimeEOFException if fewer than buffer.length bytes to read + */ + public void readBytes(final byte[] buffer) { + readBytes(buffer, 0, buffer.length); + } + + /** + * Read a byte array from the input stream + * + * @param buffer where to put bytes read + * @param offset offset to start putting bytes into buffer + * @param length number of bytes to read + * @throws RuntimeEOFException if fewer than length bytes to read + */ + public void readBytes(final byte[] buffer, final int offset, final int length) { + int totalNumRead = 0; + do { + final int numRead = readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead); + if (numRead < 0) { + String msg = String.format("Premature EOF. Expected %d but only received %d", length, totalNumRead); + throw new RuntimeEOFException(constructErrorMessage(msg)); + } else { + totalNumRead += numRead; + } + } while (totalNumRead < length); + } + + /** + * Reads a byte array from the input stream. + * + * @param buffer where to put bytes read + * @param offset offset to start putting bytes into buffer + * @param length number of bytes to read. Fewer bytes may be read if EOF is reached before length bytes + * have been read. + * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of the stream has been reached. + */ + public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) { + if (isWriting) { + throw new IllegalStateException("Calling read method on BinaryCodec open for write."); + } + try { + // Some implementations of InputStream do not behave well when the buffer is empty and length is zero, for + // example ByteArrayInputStream, so we must check for length equal to zero. + // See: https://bugs.java.com/view_bug.do?bug_id=6766844 + return (length == 0) ? 0 : inputStream.read(buffer, offset, length); + } catch (IOException e) { + throw new RuntimeIOException(constructErrorMessage("Read error"), e); + } + } + + /** + * @return a single byte read from the input stream. + */ + public byte readByte() { + if (isWriting) { + throw new IllegalStateException("Calling read method on BinaryCodec open for write."); + } + try { + final int ret = inputStream.read(); + if (ret == -1) { + throw new RuntimeEOFException(constructErrorMessage("Premature EOF")); + } + return (byte) ret; + } catch (IOException e) { + throw new RuntimeIOException(constructErrorMessage("Read error"), e); + } + } + + /** + * @return true if it is possible to know for sure if at EOF, and it is known for sure. + * If the input stream is a ByteArrayInputStream, this is faster than causing a RuntimeEOFException + * to be thrown. + */ + public boolean knownAtEof() { + if (isWriting) { + throw new IllegalStateException("Calling knownAtEof method on BinaryCodec open for write."); + } + try { + return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0; + } catch (IOException e) { + throw new RuntimeIOException(constructErrorMessage("available() error"), e); + } + } + + /** + * Read a string off the input stream, as ASCII bytes + * + * @param length length of string to read + * @return String read from stream + */ + public String readString(final int length) { + final byte[] buffer; + // Recycle single buffer if possible + if (length <= scratchBuffer.length) { + buffer = scratchBuffer; + } else { + buffer = new byte[length]; + } + readBytes(buffer, 0, length); + + return StringUtil.bytesToString(buffer, 0, length); + } + + /** + * Read ASCII bytes from the input stream until a null byte is read + * @return String constructed from the ASCII bytes read + */ + public String readNullTerminatedString() { + return StringUtil.readNullTerminatedString(this); + } + + /** + * Read an int length, and then a String of that length + * @param devourNull if true, the length include a null terminator, which is read and discarded + */ + public String readLengthAndString(final boolean devourNull) { + int length = readInt(); + if (devourNull) { + --length; + } + final String ret = readString(length); + if (devourNull) { + readByte(); + } + return ret; + } + + private void readByteBuffer(final int numBytes) { + assert (numBytes <= byteBuffer.capacity()); + readBytes(byteBuffer.array(), 0, numBytes); + byteBuffer.limit(byteBuffer.capacity()); + byteBuffer.position(numBytes); + } + + /** + * Read an int off the input stream + * + * @return int from input stream + */ + public int readInt() { + readByteBuffer(4); + byteBuffer.flip(); + return byteBuffer.getInt(); + } + + /** + * Reads a double off the input stream + * + * @return double + */ + public double readDouble() { + readByteBuffer(8); + byteBuffer.flip(); + return byteBuffer.getDouble(); + } + + /** + * Reads a long off the input stream + * + * @return long + */ + public long readLong() { + readByteBuffer(8); + byteBuffer.flip(); + return byteBuffer.getLong(); + } + + public short readShort() { + readByteBuffer(2); + byteBuffer.flip(); + return byteBuffer.getShort(); + } + + /** + * Reads a float off the input stream + * + * @return float + */ + public float readFloat() { + readByteBuffer(4); + byteBuffer.flip(); + return byteBuffer.getFloat(); + } + + /** + * Reads a boolean off the input stream, represented as a byte with value 1 or 0 + * + * @return boolean + */ + public boolean readBoolean() { + return (((int) readByte()) == 1); + } + + /** + * Reads an 8-bit unsigned byte from the input stream. + * This method assumes little-endianness. + */ + public short readUByte() { + readByteBuffer(1); + byteBuffer.put((byte) 0); + byteBuffer.flip(); + return byteBuffer.getShort(); + } + + /** + * Reads a 16-bit unsigned short from the input stream. + * This method assumes little-endianness. + */ + public int readUShort() { + readByteBuffer(2); + byteBuffer.putShort((short) 0); + byteBuffer.flip(); + return byteBuffer.getInt(); + } + + /** + * Reads a 32-bit unsigned int from the input stream. + * This method assumes little-endianness. + */ + public long readUInt() { + readByteBuffer(4); + byteBuffer.putInt(0); + byteBuffer.flip(); + return byteBuffer.getLong(); + } + + /** + * Close the appropriate stream + */ + @Override + public void close() { + try { + if (this.isWriting) { + // To the degree possible, make sure the bytes get forced to the file system, + // or else cause an exception to be thrown. + this.outputStream.flush(); + if (this.outputStream instanceof FileOutputStream) { + FileOutputStream fos = (FileOutputStream) this.outputStream; + try { + fos.getFD().sync(); + } catch (SyncFailedException e) { + // Since the sync is belt-and-suspenders anyway, don't throw an exception if it fails, + // because on some OSs it will fail for some types of output. E.g. writing to /dev/null + // on some Unixes. + } + } + this.outputStream.close(); + } else this.inputStream.close(); + } catch (IOException e) { + throw new RuntimeIOException(e.getMessage(), e); + } + } + + private String constructErrorMessage(final String msg) { + final StringBuilder sb = new StringBuilder(msg); + sb.append("; BinaryCodec in ").append(isWriting ? "write" : "read").append("mode; "); + final String filename = isWriting ? outputFileName : inputFileName; + if (filename != null) { + sb.append("file: ").append(filename); + } else { + sb.append("streamed file (filename not available)"); + } + return sb.toString(); + } + + ////////////////////////////////////////////////// + // Some getters // + ////////////////////////////////////////////////// + + public String getInputFileName() { + return inputFileName; + } + + public String getOutputFileName() { + return outputFileName; + } + + public void setOutputFileName(final String outputFileName) { + this.outputFileName = outputFileName; + } + + public void setInputFileName(final String inputFileName) { + this.inputFileName = inputFileName; + } + + public boolean isWriting() { + return isWriting; + } + + public OutputStream getOutputStream() { + return outputStream; + } + + public InputStream getInputStream() { + return inputStream; + } + + public void setInputStream(final InputStream is) { + isWriting = false; + this.inputStream = is; + } + + public void setOutputStream(final OutputStream os) { + isWriting = true; + this.outputStream = os; + } +} diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedFilePointerUtil.java b/src/main/java/htsjdk/samtools/util/BlockCompressedFilePointerUtil.java index c0433f7ac0..70040be6e2 100644 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedFilePointerUtil.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedFilePointerUtil.java @@ -35,7 +35,7 @@ public class BlockCompressedFilePointerUtil { public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK; public static final int MAX_OFFSET = OFFSET_MASK; - + /** * @param vfp1 * @param vfp2 @@ -57,7 +57,7 @@ public static int compare(final long vfp1, final long vfp2) { public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) { final long block1 = getBlockAddress(vfp1); final long block2 = getBlockAddress(vfp2); - return (block1 == block2 || block1 + 1 == block2); + return (block1 == block2 || block1 + 1 == block2); } /** @@ -122,7 +122,8 @@ public static long shift(final long virtualFilePointer, final long offset) { } public static String asString(final long vfp) { - return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp)); + return String.format( + "%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp)); } /** @@ -131,7 +132,9 @@ public static String asString(final long vfp) { * @return {@code vfp} formatted as string in address:offset form */ public static String asAddressOffsetString(final long vfp) { - return String.format(Locale.US,"%d:%d", + return String.format( + Locale.US, + "%d:%d", BlockCompressedFilePointerUtil.getBlockAddress(vfp), BlockCompressedFilePointerUtil.getBlockOffset(vfp)); } diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java index 898b15c495..4b1e424121 100755 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java @@ -23,7 +23,6 @@ */ package htsjdk.samtools.util; - import htsjdk.samtools.FileTruncatedException; import htsjdk.samtools.SAMException; import htsjdk.samtools.seekablestream.SeekableBufferedStream; @@ -32,7 +31,6 @@ import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.zip.InflaterFactory; - import java.io.*; import java.net.URL; import java.nio.ByteBuffer; @@ -48,19 +46,19 @@ * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering. * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the * entire file up to the location being sought. Note that seeking is only possible if the input stream is seekable. - * - * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. + * + * Note that this implementation is not synchronized. If multiple threads access an instance concurrently, it must be synchronized externally. * * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format */ public class BlockCompressedInputStream extends InputStream implements LocationAware { - public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: "; - public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: "; - public final static String PREMATURE_END_MSG = "Premature end of file: "; - public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream"; - public final static String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream"; - public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: "; + public static final String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: "; + public static final String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: "; + public static final String PREMATURE_END_MSG = "Premature end of file: "; + public static final String CANNOT_SEEK_STREAM_MSG = "Cannot seek a position for a non-file stream"; + public static final String CANNOT_SEEK_CLOSED_STREAM_MSG = "Cannot seek a position for a closed stream"; + public static final String INVALID_FILE_PTR_MSG = "Invalid file pointer: "; private InputStream mStream = null; private boolean mIsClosed = false; @@ -103,11 +101,11 @@ public BlockCompressedInputStream(final InputStream stream, final boolean allowB * @param allowBuffering if true, allow buffering * @param inflaterFactory {@link InflaterFactory} used by {@link BlockGunzipper} */ - public BlockCompressedInputStream(final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) { + public BlockCompressedInputStream( + final InputStream stream, final boolean allowBuffering, final InflaterFactory inflaterFactory) { if (allowBuffering) { mStream = IOUtil.toBufferedStream(stream); - } - else { + } else { mStream = stream; } @@ -124,7 +122,6 @@ public BlockCompressedInputStream(final File file) throws IOException { this(file, BlockGunzipper.getDefaultInflaterFactory()); } - /** * Equivalent constructor for Path as the one that takes a File. Supports seeking. */ @@ -132,7 +129,6 @@ public BlockCompressedInputStream(final Path file) throws IOException { this(new SeekablePathStream(file)); } - /** * Use this ctor if you wish to call seek() * @param file source of bytes @@ -145,7 +141,6 @@ public BlockCompressedInputStream(final File file, final InflaterFactory inflate blockGunzipper = new BlockGunzipper(inflaterFactory); } - /** * @param url source of bytes */ @@ -244,7 +239,7 @@ public void close() throws IOException { * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255. * If no byte is available because the end of the stream has been reached, the value -1 is returned. * This method blocks until input data is available, the end of the stream is detected, or an exception is thrown. - + * * @return the next byte of data, or -1 if the end of the stream is reached. */ @Override @@ -271,9 +266,9 @@ public int read(final byte[] buffer) throws IOException { private volatile ByteArrayOutputStream buf = null; private static final byte eol = '\n'; private static final byte eolCr = '\r'; - + /** - * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'), + * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'), * carriage return ('\r') or carriage return followed by a line feed ("\r\n"). * * @return A String containing the contents of the line, excluding the line terminating @@ -286,7 +281,7 @@ public String readLine() throws IOException { if (available == 0) { return null; } - if(null == buf){ // lazy initialisation + if (null == buf) { // lazy initialisation buf = new ByteArrayOutputStream(8192); } buf.reset(); @@ -295,27 +290,27 @@ public String readLine() throws IOException { while (!done) { int linetmpPos = mCurrentOffset; int bCnt = 0; - while((available-- > 0)){ + while ((available-- > 0)) { final byte c = mCurrentBlock.mBlock[linetmpPos++]; - if(c == eol){ // found \n + if (c == eol) { // found \n done = true; break; - } else if(foundCr){ // previous char was \r + } else if (foundCr) { // previous char was \r --linetmpPos; // current char is not \n so put it back done = true; break; - } else if(c == eolCr){ // found \r + } else if (c == eolCr) { // found \r foundCr = true; continue; // no ++bCnt } ++bCnt; } - if(mCurrentOffset < linetmpPos) { + if (mCurrentOffset < linetmpPos) { buf.write(mCurrentBlock.mBlock, mCurrentOffset, bCnt); mCurrentOffset = linetmpPos; } available = available(); - if(available == 0) { + if (available == 0) { // EOF done = true; } @@ -397,20 +392,19 @@ public void seek(final long pos) throws IOException { } mCurrentOffset = uncompressedOffset; } - + /** - * Performs cleanup required before seek is called on the underlying stream + * Performs cleanup required before seek is called on the underlying stream */ - protected void prepareForSeek() { - } + protected void prepareForSeek() {} private boolean eof() throws IOException { if (mFile.eof()) { return true; } // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF. - return (mFile.length() - (mCurrentBlock.mBlockAddress - + mCurrentBlock.mBlockCompressedSize) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); + return (mFile.length() - (mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize) + == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); } /** @@ -427,7 +421,8 @@ public long getFilePointer() { // If current offset is at the end of the current block, file // pointer should point // to the beginning of the next block. - return BlockCompressedFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0); + return BlockCompressedFilePointerUtil.makeFilePointer( + mCurrentBlock.mBlockAddress + mCurrentBlock.mBlockCompressedSize, 0); } return BlockCompressedFilePointerUtil.makeFilePointer(mCurrentBlock.mBlockAddress, mCurrentOffset); } @@ -440,7 +435,7 @@ public long getPosition() { public static long getFileBlock(final long bgzfOffset) { return BlockCompressedFilePointerUtil.getBlockAddress(bgzfOffset); } - + /** * @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported(). * @return true if the given file looks like a valid BGZF file. @@ -457,12 +452,12 @@ public static boolean isValidFile(final InputStream stream) throws IOException { } private static boolean isValidBlockHeader(final byte[] buffer) { - return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 && - (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 && - (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 && - buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN && - buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 && - buffer[13] == BlockCompressedStreamConstants.BGZF_ID2); + return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 + && (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 + && (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 + && buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN + && buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 + && buffer[13] == BlockCompressedStreamConstants.BGZF_ID2); } private void readBlock() throws IOException { @@ -493,9 +488,9 @@ private void checkAndRethrowDecompressionException() throws IOException { } } } - + /** - * Attempt to reuse the buffer of the given block + * Attempt to reuse the buffer of the given block * @param block owning block * @return null decompressing buffer to reuse, null if no buffer is available */ @@ -503,9 +498,9 @@ private byte[] getBufferForReuse(DecompressedBlock block) { if (block == null) return null; return block.mBlock; } - + /** - * Decompress the next block from the input stream. When using asynchronous + * Decompress the next block from the input stream. When using asynchronous * IO, this will be called by the background thread. * @param bufferAvailableForReuse buffer in which to place decompressed block. A null or * incorrectly sized buffer will result in the buffer being ignored and @@ -525,20 +520,23 @@ protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) { return new DecompressedBlock(blockAddress, new byte[0], 0); } if (headerByteCount != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) { - return new DecompressedBlock(blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource())); + return new DecompressedBlock( + blockAddress, headerByteCount, new IOException(INCORRECT_HEADER_SIZE_MSG + getSource())); } final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1; if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) { - return new DecompressedBlock(blockAddress, blockLength, + return new DecompressedBlock( + blockAddress, + blockLength, new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + getSource())); } final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH; - final int dataByteCount = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, - remaining); + final int dataByteCount = + readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining); mStreamOffset += dataByteCount; if (dataByteCount != remaining) { - return new DecompressedBlock(blockAddress, blockLength, - new FileTruncatedException(PREMATURE_END_MSG + getSource())); + return new DecompressedBlock( + blockAddress, blockLength, new FileTruncatedException(PREMATURE_END_MSG + getSource())); } final byte[] decompressed = inflateBlock(mFileBuffer, blockLength, bufferAvailableForReuse); return new DecompressedBlock(blockAddress, decompressed, blockLength); @@ -547,15 +545,16 @@ protected DecompressedBlock processNextBlock(byte[] bufferAvailableForReuse) { } } - private byte[] inflateBlock(final byte[] compressedBlock, final int compressedLength, - final byte[] bufferAvailableForReuse) throws IOException { + private byte[] inflateBlock( + final byte[] compressedBlock, final int compressedLength, final byte[] bufferAvailableForReuse) + throws IOException { final int uncompressedLength = unpackInt32(compressedBlock, compressedLength - 4); if (uncompressedLength < 0) { throw new RuntimeIOException(getSource() + " has invalid uncompressedLength: " + uncompressedLength); } byte[] buffer = bufferAvailableForReuse; if (buffer == null || uncompressedLength != buffer.length) { - // can't reuse the buffer since the size is incorrect + // can't reuse the buffer since the size is incorrect buffer = new byte[uncompressedLength]; } blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength); @@ -576,7 +575,8 @@ private int readBytes(final byte[] buffer, final int offset, final int length) t } } - private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException { + private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) + throws IOException { int bytesRead = 0; while (bytesRead < length) { final int count = file.read(buffer, offset + bytesRead, length - bytesRead); @@ -588,7 +588,8 @@ private static int readBytes(final SeekableStream file, final byte[] buffer, fin return bytesRead; } - private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException { + private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) + throws IOException { int bytesRead = 0; while (bytesRead < length) { final int count = stream.read(buffer, offset + bytesRead, length - bytesRead); @@ -601,18 +602,21 @@ private static int readBytes(final InputStream stream, final byte[] buffer, fina } private int unpackInt16(final byte[] buffer, final int offset) { - return ((buffer[offset] & 0xFF) | - ((buffer[offset+1] & 0xFF) << 8)); + return ((buffer[offset] & 0xFF) | ((buffer[offset + 1] & 0xFF) << 8)); } private int unpackInt32(final byte[] buffer, final int offset) { - return ((buffer[offset] & 0xFF) | - ((buffer[offset+1] & 0xFF) << 8) | - ((buffer[offset+2] & 0xFF) << 16) | - ((buffer[offset+3] & 0xFF) << 24)); + return ((buffer[offset] & 0xFF) + | ((buffer[offset + 1] & 0xFF) << 8) + | ((buffer[offset + 2] & 0xFF) << 16) + | ((buffer[offset + 3] & 0xFF) << 24)); } - public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE} + public enum FileTermination { + HAS_TERMINATOR_BLOCK, + HAS_HEALTHY_LAST_BLOCK, + DEFECTIVE + } /** * @@ -631,7 +635,7 @@ public static FileTermination checkTermination(final File file) throws IOExcepti * @throws IOException */ public static FileTermination checkTermination(final Path path) throws IOException { - try( final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ) ){ + try (final SeekableByteChannel channel = Files.newByteChannel(path, StandardOpenOption.READ)) { return checkTermination(channel); } } @@ -656,27 +660,30 @@ public static FileTermination checkTermination(SeekableByteChannel channel) thro try { channel.position(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); - //Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file - final ByteBuffer lastBlockBuffer = ByteBuffer.allocate(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); + // Check if the end of the file is an empty gzip block which is used as the terminator for a bgzipped file + final ByteBuffer lastBlockBuffer = + ByteBuffer.allocate(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length); readFully(channel, lastBlockBuffer); if (Arrays.equals(lastBlockBuffer.array(), BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) { return FileTermination.HAS_TERMINATOR_BLOCK; } - //if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's corrupted + // if the last block isn't an empty gzip block, check to see if it is a healthy compressed block or if it's + // corrupted final int bufsize = (int) Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE); final byte[] bufferArray = new byte[bufsize]; channel.position(fileSize - bufsize); readFully(channel, ByteBuffer.wrap(bufferArray)); - for (int i = bufferArray.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length; - i >= 0; --i) { - if (!preambleEqual(BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE, - bufferArray, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) { + for (int i = bufferArray.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length; i >= 0; --i) { + if (!preambleEqual( + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE, + bufferArray, + i, + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) { continue; } - final ByteBuffer byteBuffer = ByteBuffer.wrap(bufferArray, - i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, - 4); + final ByteBuffer byteBuffer = + ByteBuffer.wrap(bufferArray, i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, 4); byteBuffer.order(ByteOrder.LITTLE_ENDIAN); final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF; if (bufferArray.length - i == totalBlockSizeMinusOne + 1) { @@ -690,9 +697,10 @@ public static FileTermination checkTermination(SeekableByteChannel channel) thro exceptionThrown = true; throw e; } finally { - //if an exception was thrown we don't want to reset the position because that would be likely to throw again - //and suppress the initial exception - if(!exceptionThrown) { + // if an exception was thrown we don't want to reset the position because that would be likely to throw + // again + // and suppress the initial exception + if (!exceptionThrown) { channel.position(initialPosition); } } @@ -726,7 +734,8 @@ public static void assertNonDefectivePath(final Path file) throws IOException { } } - private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) { + private static boolean preambleEqual( + final byte[] preamble, final byte[] buf, final int startOffset, final int length) { for (int i = 0; i < length; ++i) { if (preamble[i] != buf[i + startOffset]) { return false; diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java index d489d99bf2..8b22c47424 100644 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedOutputStream.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.util.zip.DeflaterFactory; - import java.io.File; import java.io.IOException; import java.io.OutputStream; @@ -48,10 +47,7 @@ * * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF file format. */ -public class BlockCompressedOutputStream - extends OutputStream - implements LocationAware -{ +public class BlockCompressedOutputStream extends OutputStream implements LocationAware { private static final Log log = Log.getInstance(BlockCompressedOutputStream.class); @@ -61,7 +57,7 @@ public class BlockCompressedOutputStream /** * Sets the GZip compression level for subsequent BlockCompressedOutputStream object creation * that do not specify the compression level. - * @param compressionLevel 1 <= compressionLevel <= 9 + * @param compressionLevel {@code 1 <= compressionLevel <= 9} */ public static void setDefaultCompressionLevel(final int compressionLevel) { if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) { @@ -93,9 +89,9 @@ public static DeflaterFactory getDefaultDeflaterFactory() { private final BinaryCodec codec; private final byte[] uncompressedBuffer = new byte[BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE]; private int numUncompressedBytes = 0; - private final byte[] compressedBuffer = - new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE - - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH]; + private final byte[] compressedBuffer = new byte + [BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE + - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH]; private final Deflater deflater; // A second deflater is created for the very unlikely case where the regular deflation actually makes @@ -137,7 +133,7 @@ public BlockCompressedOutputStream(final File file) { /** * Prepare to compress at the given compression level * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. - * @param compressionLevel 1 <= compressionLevel <= 9 + * @param compressionLevel {@code 1 <= compressionLevel <= 9} */ public BlockCompressedOutputStream(final String filename, final int compressionLevel) { this(new File(filename), compressionLevel); @@ -145,7 +141,7 @@ public BlockCompressedOutputStream(final String filename, final int compressionL /** * Prepare to compress at the given compression level - * @param compressionLevel 1 <= compressionLevel <= 9 + * @param compressionLevel {@code 1 <= compressionLevel <= 9} * Note: this constructor uses the default {@link DeflaterFactory}, see {@link #getDefaultDeflaterFactory()}. * Use {@link #BlockCompressedOutputStream(File, int, DeflaterFactory)} to specify a custom factory. */ @@ -155,19 +151,21 @@ public BlockCompressedOutputStream(final File file, final int compressionLevel) /** * Prepare to compress at the given compression level - * @param compressionLevel 1 <= compressionLevel <= 9 + * @param compressionLevel {@code 1 <= compressionLevel <= 9} * @param deflaterFactory custom factory to create deflaters (overrides the default) */ - public BlockCompressedOutputStream(final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + public BlockCompressedOutputStream( + final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { this(IOUtil.toPath(file), compressionLevel, deflaterFactory); } /** * Prepare to compress at the given compression level - * @param compressionLevel 1 <= compressionLevel <= 9 + * @param compressionLevel {@code 1 <= compressionLevel <= 9} * @param deflaterFactory custom factory to create deflaters (overrides the default) */ - public BlockCompressedOutputStream(final Path path, final int compressionLevel, final DeflaterFactory deflaterFactory) { + public BlockCompressedOutputStream( + final Path path, final int compressionLevel, final DeflaterFactory deflaterFactory) { this.file = path; codec = new BinaryCodec(path, true); deflater = deflaterFactory.makeDeflater(compressionLevel, true); @@ -219,7 +217,8 @@ public BlockCompressedOutputStream(final OutputStream os, final Path file, final * @param compressionLevel the compression level (0-9) * @param deflaterFactory custom factory to create deflaters (overrides the default) */ - public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + public BlockCompressedOutputStream( + final OutputStream os, final File file, final int compressionLevel, final DeflaterFactory deflaterFactory) { this(os, IOUtil.toPath(file), compressionLevel, deflaterFactory); } @@ -230,7 +229,8 @@ public BlockCompressedOutputStream(final OutputStream os, final File file, final * @param compressionLevel the compression level (0-9) * @param deflaterFactory custom factory to create deflaters (overrides the default) */ - public BlockCompressedOutputStream(final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) { + public BlockCompressedOutputStream( + final OutputStream os, final Path file, final int compressionLevel, final DeflaterFactory deflaterFactory) { this.file = file; codec = new BinaryCodec(os); if (file != null) { @@ -249,9 +249,9 @@ public BlockCompressedOutputStream(final OutputStream os, final Path file, final */ public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(final File location, OutputStream output) { if (!(output instanceof BlockCompressedOutputStream)) { - return new BlockCompressedOutputStream(output, location); + return new BlockCompressedOutputStream(output, location); } else { - return (BlockCompressedOutputStream)output; + return (BlockCompressedOutputStream) output; } } @@ -263,7 +263,8 @@ public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(final File l */ public void addIndexer(final OutputStream outputStream) { if (mBlockAddress != 0) { - throw new RuntimeException("Cannot add gzi indexer if this BlockCompressedOutput stream has already written Gzipped blocks"); + throw new RuntimeException( + "Cannot add gzi indexer if this BlockCompressedOutput stream has already written Gzipped blocks"); } indexer = new GZIIndex.GZIIndexer(outputStream); } @@ -289,14 +290,14 @@ public void write(final byte[] bytes) throws IOException { */ @Override public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException { - assert(numUncompressedBytes < uncompressedBuffer.length); + assert (numUncompressedBytes < uncompressedBuffer.length); while (numBytes > 0) { final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes); System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite); numUncompressedBytes += bytesToWrite; startIndex += bytesToWrite; numBytes -= bytesToWrite; - assert(numBytes >= 0); + assert (numBytes >= 0); if (numUncompressedBytes == uncompressedBuffer.length) { deflateBlock(); } @@ -352,8 +353,8 @@ public void close(final boolean writeTerminatorBlock) throws IOException { if (writeTerminatorBlock) { // Can't re-open something that is not a regular file, e.g. a named pipe or an output stream if (this.file == null || !Files.isRegularFile(this.file)) return; - if (BlockCompressedInputStream.checkTermination(this.file) != - BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) { + if (BlockCompressedInputStream.checkTermination(this.file) + != BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) { throw new IOException("Terminator block not found after closing BGZF file " + this.file); } } @@ -363,7 +364,7 @@ public void close(final boolean writeTerminatorBlock) throws IOException { * Upper 48 bits is the byte offset into the compressed stream of a block. * Lower 16 bits is the byte offset into the uncompressed stream inside the block. */ - public long getFilePointer(){ + public long getFilePointer() { return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes); } @@ -406,7 +407,7 @@ private int deflateBlock() { crc32.update(uncompressedBuffer, 0, bytesToCompress); final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue()); - assert(bytesToCompress <= numUncompressedBytes); + assert (bytesToCompress <= numUncompressedBytes); // Call out to the indexer if it exists if (indexer != null) { @@ -436,13 +437,14 @@ private int writeGzipBlock(final int compressedSize, final int uncompressedSize, codec.writeByte(BlockCompressedStreamConstants.BGZF_ID1); codec.writeByte(BlockCompressedStreamConstants.BGZF_ID2); codec.writeShort(BlockCompressedStreamConstants.BGZF_LEN); - final int totalBlockSize = compressedSize + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH + - BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH; + final int totalBlockSize = compressedSize + + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH + + BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH; // I don't know why we store block size - 1, but that is what the spec says - codec.writeShort((short)(totalBlockSize - 1)); + codec.writeShort((short) (totalBlockSize - 1)); codec.writeBytes(compressedBuffer, 0, compressedSize); - codec.writeInt((int)crc); + codec.writeInt((int) crc); codec.writeInt(uncompressedSize); return totalBlockSize; } diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedStreamConstants.java b/src/main/java/htsjdk/samtools/util/BlockCompressedStreamConstants.java index 4f4505762e..81db8cd8cb 100644 --- a/src/main/java/htsjdk/samtools/util/BlockCompressedStreamConstants.java +++ b/src/main/java/htsjdk/samtools/util/BlockCompressedStreamConstants.java @@ -83,36 +83,53 @@ public class BlockCompressedStreamConstants { public static final byte BGZF_LEN = 2; public static final byte[] EMPTY_GZIP_BLOCK = { - BlockCompressedStreamConstants.GZIP_ID1, - (byte)BlockCompressedStreamConstants.GZIP_ID2, - BlockCompressedStreamConstants.GZIP_CM_DEFLATE, - BlockCompressedStreamConstants.GZIP_FLG, - 0, 0, 0, 0, // Modification time - BlockCompressedStreamConstants.GZIP_XFL, - (byte)BlockCompressedStreamConstants.GZIP_OS_UNKNOWN, - BlockCompressedStreamConstants.GZIP_XLEN, 0, // Little-endian short - BlockCompressedStreamConstants.BGZF_ID1, - BlockCompressedStreamConstants.BGZF_ID2, - BlockCompressedStreamConstants.BGZF_LEN, 0, // Little-endian short - // Total block size - 1 - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH + - BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short - // Dummy payload? - 3, 0, - 0, 0, 0, 0, // crc - 0, 0, 0, 0, // uncompressedSize + BlockCompressedStreamConstants.GZIP_ID1, + (byte) BlockCompressedStreamConstants.GZIP_ID2, + BlockCompressedStreamConstants.GZIP_CM_DEFLATE, + BlockCompressedStreamConstants.GZIP_FLG, + 0, + 0, + 0, + 0, // Modification time + BlockCompressedStreamConstants.GZIP_XFL, + (byte) BlockCompressedStreamConstants.GZIP_OS_UNKNOWN, + BlockCompressedStreamConstants.GZIP_XLEN, + 0, // Little-endian short + BlockCompressedStreamConstants.BGZF_ID1, + BlockCompressedStreamConstants.BGZF_ID2, + BlockCompressedStreamConstants.BGZF_LEN, + 0, // Little-endian short + // Total block size - 1 + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH + BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, + 0, // Little-endian short + // Dummy payload? + 3, + 0, + 0, + 0, + 0, + 0, // crc + 0, + 0, + 0, + 0, // uncompressedSize }; public static final byte[] GZIP_BLOCK_PREAMBLE = { - BlockCompressedStreamConstants.GZIP_ID1, - (byte)BlockCompressedStreamConstants.GZIP_ID2, - BlockCompressedStreamConstants.GZIP_CM_DEFLATE, - BlockCompressedStreamConstants.GZIP_FLG, - 0, 0, 0, 0, // Modification time - BlockCompressedStreamConstants.GZIP_XFL, - (byte)BlockCompressedStreamConstants.GZIP_OS_UNKNOWN, - BlockCompressedStreamConstants.GZIP_XLEN, 0, // Little-endian short - BlockCompressedStreamConstants.BGZF_ID1, - BlockCompressedStreamConstants.BGZF_ID2, - BlockCompressedStreamConstants.BGZF_LEN, 0, // Little-endian short + BlockCompressedStreamConstants.GZIP_ID1, + (byte) BlockCompressedStreamConstants.GZIP_ID2, + BlockCompressedStreamConstants.GZIP_CM_DEFLATE, + BlockCompressedStreamConstants.GZIP_FLG, + 0, + 0, + 0, + 0, // Modification time + BlockCompressedStreamConstants.GZIP_XFL, + (byte) BlockCompressedStreamConstants.GZIP_OS_UNKNOWN, + BlockCompressedStreamConstants.GZIP_XLEN, + 0, // Little-endian short + BlockCompressedStreamConstants.BGZF_ID1, + BlockCompressedStreamConstants.BGZF_ID2, + BlockCompressedStreamConstants.BGZF_LEN, + 0, // Little-endian short }; } diff --git a/src/main/java/htsjdk/samtools/util/BlockGunzipper.java b/src/main/java/htsjdk/samtools/util/BlockGunzipper.java index bf763ba08f..ba91220a8b 100644 --- a/src/main/java/htsjdk/samtools/util/BlockGunzipper.java +++ b/src/main/java/htsjdk/samtools/util/BlockGunzipper.java @@ -25,7 +25,6 @@ import htsjdk.samtools.SAMFormatException; import htsjdk.samtools.util.zip.InflaterFactory; - import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.zip.CRC32; @@ -101,23 +100,26 @@ public int unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int comp * @param uncompressedBlock must be big enough to hold decompressed output. * @param uncompressedBlockOffset the offset into uncompressedBlock. * @param compressedBlock compressed data starting at offset 0. - * @param compressedBlock the offset into the compressed data. + * @param compressedBlockOffset the offset into the compressed data. * @param compressedLength size of compressed data, possibly less than the size of the buffer. * @return the uncompressed data size. */ - public int unzipBlock(byte[] uncompressedBlock, int uncompressedBlockOffset, - byte[] compressedBlock, int compressedBlockOffset, int compressedLength) { + public int unzipBlock( + byte[] uncompressedBlock, + int uncompressedBlockOffset, + byte[] compressedBlock, + int compressedBlockOffset, + int compressedLength) { int uncompressedSize; try { ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, compressedBlockOffset, compressedLength); byteBuffer.order(ByteOrder.LITTLE_ENDIAN); // Validate GZIP header - if (byteBuffer.get() != BlockCompressedStreamConstants.GZIP_ID1 || - byteBuffer.get() != (byte)BlockCompressedStreamConstants.GZIP_ID2 || - byteBuffer.get() != BlockCompressedStreamConstants.GZIP_CM_DEFLATE || - byteBuffer.get() != BlockCompressedStreamConstants.GZIP_FLG - ) { + if (byteBuffer.get() != BlockCompressedStreamConstants.GZIP_ID1 + || byteBuffer.get() != (byte) BlockCompressedStreamConstants.GZIP_ID2 + || byteBuffer.get() != BlockCompressedStreamConstants.GZIP_CM_DEFLATE + || byteBuffer.get() != BlockCompressedStreamConstants.GZIP_FLG) { throw new SAMFormatException("Invalid GZIP header"); } // Skip MTIME, XFL, OS fields @@ -134,14 +136,19 @@ public int unzipBlock(byte[] uncompressedBlock, int uncompressedBlockOffset, } // Read expected size and CRD from end of GZIP block - final int deflatedSize = compressedLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH - BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH; + final int deflatedSize = compressedLength + - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH + - BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH; byteBuffer.position(byteBuffer.position() + deflatedSize); int expectedCrc = byteBuffer.getInt(); uncompressedSize = byteBuffer.getInt(); inflater.reset(); // Decompress - inflater.setInput(compressedBlock, compressedBlockOffset + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize); + inflater.setInput( + compressedBlock, + compressedBlockOffset + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, + deflatedSize); final int inflatedBytes = inflater.inflate(uncompressedBlock, uncompressedBlockOffset, uncompressedSize); if (inflatedBytes != uncompressedSize) { throw new SAMFormatException("Did not inflate expected amount"); @@ -152,12 +159,11 @@ public int unzipBlock(byte[] uncompressedBlock, int uncompressedBlockOffset, crc32.reset(); crc32.update(uncompressedBlock, uncompressedBlockOffset, uncompressedSize); final long crc = crc32.getValue(); - if ((int)crc != expectedCrc) { + if ((int) crc != expectedCrc) { throw new SAMFormatException("CRC mismatch"); } } - } catch (DataFormatException e) - { + } catch (DataFormatException e) { throw new RuntimeIOException(e); } return uncompressedSize; diff --git a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java index 47140a7936..b155933a2c 100644 --- a/src/main/java/htsjdk/samtools/util/BufferedLineReader.java +++ b/src/main/java/htsjdk/samtools/util/BufferedLineReader.java @@ -24,25 +24,25 @@ package htsjdk.samtools.util; import htsjdk.samtools.Defaults; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; +import java.nio.charset.StandardCharsets; /** * Implementation of LineReader that is a thin wrapper around BufferedReader. On Linux, this is faster * than AsciiLineReaderImpl. If you use AsciiLineReader rather than this class, it will detect the OS * and delegate to the preferred implementation. - * + * * @author alecw@broadinstitute.org */ public class BufferedLineReader extends LineNumberReader implements LineReader { - - private static final float MAX_BYTES_PER_CHAR_UTF8 = StandardCharsets.UTF_8.newEncoder().maxBytesPerChar(); + + private static final float MAX_BYTES_PER_CHAR_UTF8 = + StandardCharsets.UTF_8.newEncoder().maxBytesPerChar(); private static class StringBackedInputStream extends InputStream { private int idx = 0; @@ -56,7 +56,7 @@ private static class StringBackedInputStream extends InputStream { @Override public int read() throws IOException { - if(idx >= len) { + if (idx >= len) { return -1; } return (int) str.charAt(idx++); @@ -83,8 +83,7 @@ public static BufferedLineReader fromString(final String s) { // bytes per character to store in UTF8. if (s.length() * MAX_BYTES_PER_CHAR_UTF8 >= Integer.MAX_VALUE) { is = new StringBackedInputStream(s); - } - else { + } else { is = new ByteArrayInputStream(s.getBytes()); } return new BufferedLineReader(is); @@ -117,7 +116,7 @@ public int peek() { reset(); return ret; } catch (IOException e) { - throw new RuntimeIOException(e); + throw new RuntimeIOException(e); } } diff --git a/src/main/java/htsjdk/samtools/util/CigarUtil.java b/src/main/java/htsjdk/samtools/util/CigarUtil.java index 28ff9155fe..3b05f4abcf 100644 --- a/src/main/java/htsjdk/samtools/util/CigarUtil.java +++ b/src/main/java/htsjdk/samtools/util/CigarUtil.java @@ -32,30 +32,33 @@ import htsjdk.samtools.SAMValidationError; import htsjdk.samtools.TextCigarCodec; import htsjdk.utils.ValidationUtils; - import java.util.*; /** * @author alecw@broadinstitute.org */ public class CigarUtil { - private static final Log log = Log.getInstance(CigarUtil.class); + private static final Log log = Log.getInstance(CigarUtil.class); /** Adjust the cigar based on adapter clipping. - * TODO: If there is hard clipping at the end of the input CIGAR, it is lost. It should not be. + * TODO: If there is hard clipping at the end of the input CIGAR, it is lost. It should not be. * * * @param clipFrom 1-based position where the clipping starts * @param oldCigar The existing unclipped cigar * @param clippingOperator Type of clipping to use, either soft or hard. If non-clipping operator is used an exception is thrown * @return New adjusted list of cigar elements */ - public static List clipEndOfRead(final int clipFrom, final List oldCigar, final CigarOperator clippingOperator) { - ValidationUtils.validateArg(clippingOperator.isClipping(), () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); - final int clippedBases = (int)CoordMath.getLength(clipFrom, Cigar.getReadLength(oldCigar)); + public static List clipEndOfRead( + final int clipFrom, final List oldCigar, final CigarOperator clippingOperator) { + ValidationUtils.validateArg( + clippingOperator.isClipping(), + () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); + final int clippedBases = (int) CoordMath.getLength(clipFrom, Cigar.getReadLength(oldCigar)); List newCigar = new LinkedList(); int pos = 1; final CigarElement oldCigarFinalElement = oldCigar.get(oldCigar.size() - 1); - final int trailingHardClipBases = oldCigarFinalElement.getOperator() == CigarOperator.HARD_CLIP? oldCigarFinalElement.getLength() : 0; + final int trailingHardClipBases = + oldCigarFinalElement.getOperator() == CigarOperator.HARD_CLIP ? oldCigarFinalElement.getLength() : 0; for (CigarElement c : oldCigar) { // Distinguish two cases: @@ -64,8 +67,8 @@ public static List clipEndOfRead(final int clipFrom, final List clipEndOfRead(final int clipFrom, final List= (clipFrom - 1)) { // handle adjacent or straddling element - mergeClippingCigarElement(newCigar, c, - (clipFrom - 1) - (pos - 1) , clippedBases, clippingOperator, trailingHardClipBases); + mergeClippingCigarElement( + newCigar, c, (clipFrom - 1) - (pos - 1), clippedBases, clippingOperator, trailingHardClipBases); break; } - pos = endPos + 1; // update pos for next iteration + pos = endPos + 1; // update pos for next iteration } // end loop over cigar elements return newCigar; } @@ -101,30 +104,38 @@ public static List softClipEndOfRead(final int clipFrom, final Lis * @param newClippingOperator clipping operator to be merged * @param trailingHardClippedBases number of hardClippedBases which were on the end of the original cigar */ - static private void mergeClippingCigarElement(List newCigar, CigarElement originalElement, - int relativeClippedPosition, - int clippedBases, final CigarOperator newClippingOperator, - final int trailingHardClippedBases) { - ValidationUtils.validateArg(newClippingOperator.isClipping(), () -> "Clipping operator should be SOFT or HARD clip, found " + newClippingOperator.toString()); + private static void mergeClippingCigarElement( + List newCigar, + CigarElement originalElement, + int relativeClippedPosition, + int clippedBases, + final CigarOperator newClippingOperator, + final int trailingHardClippedBases) { + ValidationUtils.validateArg( + newClippingOperator.isClipping(), + () -> "Clipping operator should be SOFT or HARD clip, found " + newClippingOperator.toString()); final CigarOperator originalOperator = originalElement.getOperator(); int clipAmount = clippedBases; if (newClippingOperator == CigarOperator.HARD_CLIP) { clipAmount += trailingHardClippedBases; } - if (originalOperator.consumesReadBases()){ - if ((originalOperator.consumesReferenceBases() || newClippingOperator == CigarOperator.HARD_CLIP ) && relativeClippedPosition > 0){ + if (originalOperator.consumesReadBases()) { + if ((originalOperator.consumesReferenceBases() || newClippingOperator == CigarOperator.HARD_CLIP) + && relativeClippedPosition > 0) { newCigar.add(new CigarElement(relativeClippedPosition, originalOperator)); } - if (!(originalOperator.consumesReferenceBases() || newClippingOperator == CigarOperator.HARD_CLIP ) || originalOperator == newClippingOperator) { + if (!(originalOperator.consumesReferenceBases() || newClippingOperator == CigarOperator.HARD_CLIP) + || originalOperator == newClippingOperator) { clipAmount = clippedBases + relativeClippedPosition; } - } else if (relativeClippedPosition != 0){ + } else if (relativeClippedPosition != 0) { throw new SAMException("Unexpected non-0 relativeClippedPosition " + relativeClippedPosition); } - newCigar.add(new CigarElement(clipAmount, newClippingOperator)); // add clipping operator - if(newClippingOperator == CigarOperator.SOFT_CLIP && trailingHardClippedBases > 0) { - newCigar.add(new CigarElement(trailingHardClippedBases, CigarOperator.HARD_CLIP)); //add in trailing hard-clipped bases + newCigar.add(new CigarElement(clipAmount, newClippingOperator)); // add clipping operator + if (newClippingOperator == CigarOperator.SOFT_CLIP && trailingHardClippedBases > 0) { + newCigar.add(new CigarElement( + trailingHardClippedBases, CigarOperator.HARD_CLIP)); // add in trailing hard-clipped bases } } @@ -145,20 +156,22 @@ public static void softClip3PrimeEndOfRead(SAMRecord rec, final int clipFrom) { * @param clippingOperator Type of clipping to use, either soft or hard. If non-clipping operator is used an exception is thrown */ public static void clip3PrimeEndOfRead(SAMRecord rec, final int clipFrom, final CigarOperator clippingOperator) { - ValidationUtils.validateArg(clippingOperator.isClipping(), () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); + ValidationUtils.validateArg( + clippingOperator.isClipping(), + () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); final Cigar cigar = rec.getCigar(); // we don't worry about SEED_REGION_LENGTH in clipFrom final boolean negativeStrand = rec.getReadNegativeStrandFlag(); List oldCigar = cigar.getCigarElements(); - if (!isValidCigar(rec, cigar, true)){ + if (!isValidCigar(rec, cigar, true)) { return; // log message already issued } final int originalReadLength = rec.getReadLength(); final int originalReferenceLength = cigar.getReferenceLength(); - if (negativeStrand){ + if (negativeStrand) { // Can't just use Collections.reverse() here because oldCigar is unmodifiable oldCigar = new ArrayList(oldCigar); Collections.reverse(oldCigar); @@ -170,34 +183,37 @@ public static void clip3PrimeEndOfRead(SAMRecord rec, final int clipFrom, final } final Cigar newCigar = new Cigar(newCigarElems); - if (negativeStrand){ + if (negativeStrand) { int oldLength = cigar.getReferenceLength(); int newLength = newCigar.getReferenceLength(); int sizeChange = oldLength - newLength; - if (sizeChange > 0){ + if (sizeChange > 0) { rec.setAlignmentStart(rec.getAlignmentStart() + sizeChange); - } else if (sizeChange < 0){ - throw new SAMException("The clipped length " + newLength + - " is longer than the old unclipped length " + oldLength); + } else if (sizeChange < 0) { + throw new SAMException( + "The clipped length " + newLength + " is longer than the old unclipped length " + oldLength); } } rec.setCigar(newCigar); // If hard-clipping, remove the hard-clipped bases from the read - if(clippingOperator == CigarOperator.HARD_CLIP) { + if (clippingOperator == CigarOperator.HARD_CLIP) { final byte[] bases = rec.getReadBases(); final byte[] baseQualities = rec.getBaseQualities(); if (originalReadLength != bases.length) { - throw new SAMException("length of bases array (" + bases.length + ") does not match length expected based on cigar (" + cigar+ ")"); + throw new SAMException("length of bases array (" + bases.length + + ") does not match length expected based on cigar (" + cigar + ")"); } if (originalReadLength != baseQualities.length) { - throw new SAMException("length of baseQualities array (" + baseQualities.length + ") does not match length expected based on cigar (" + cigar+ ")"); + throw new SAMException("length of baseQualities array (" + baseQualities.length + + ") does not match length expected based on cigar (" + cigar + ")"); } - if(rec.getReadNegativeStrandFlag()) { + if (rec.getReadNegativeStrandFlag()) { rec.setReadBases(Arrays.copyOfRange(bases, bases.length - clipFrom + 1, originalReadLength)); - rec.setBaseQualities(Arrays.copyOfRange(baseQualities, baseQualities.length - clipFrom + 1, originalReadLength)); + rec.setBaseQualities( + Arrays.copyOfRange(baseQualities, baseQualities.length - clipFrom + 1, originalReadLength)); } else { rec.setReadBases(Arrays.copyOf(bases, clipFrom - 1)); rec.setBaseQualities(Arrays.copyOf(baseQualities, clipFrom - 1)); @@ -215,7 +231,7 @@ public static void clip3PrimeEndOfRead(SAMRecord rec, final int clipFrom, final } if (newCigar.getReferenceLength() != originalReferenceLength) { - //invalidate NM, UQ, MD tags if we have changed the length of the read. + // invalidate NM, UQ, MD tags if we have changed the length of the read. rec.setAttribute(SAMTag.NM, null); rec.setAttribute(SAMTag.MD, null); rec.setAttribute(SAMTag.UQ, null); @@ -228,23 +244,23 @@ public static void clip3PrimeEndOfRead(SAMRecord rec, final int clipFrom, final rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START); rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); rec.setInferredInsertSize(0); - } - else if (!isValidCigar(rec, newCigar, false)){ + } else if (!isValidCigar(rec, newCigar, false)) { // log message already issued - throw new IllegalStateException("Invalid new Cigar: " + newCigar + " (" + oldCigar + ") for " + - rec.getReadName()); - } - else if (rec.getReadLength() != newCigar.getReadLength()) { - throw new IllegalStateException("new Cigar: " + newCigar + " implies different read base than record (" + rec.getReadLength() +")"); - } - else if (rec.getReadBases().length != rec.getBaseQualities().length) { - throw new IllegalStateException("new read bases have different length (" + rec.getReadBases().length + ") than new base qualities (" + rec.getBaseQualities() + ")"); + throw new IllegalStateException( + "Invalid new Cigar: " + newCigar + " (" + oldCigar + ") for " + rec.getReadName()); + } else if (rec.getReadLength() != newCigar.getReadLength()) { + throw new IllegalStateException("new Cigar: " + newCigar + " implies different read base than record (" + + rec.getReadLength() + ")"); + } else if (rec.getReadBases().length != rec.getBaseQualities().length) { + throw new IllegalStateException("new read bases have different length (" + rec.getReadBases().length + + ") than new base qualities (" + rec.getBaseQualities() + ")"); } - } private static boolean isValidCigar(SAMRecord rec, Cigar cigar, boolean isOldCigar) { - if (cigar == null || cigar.getCigarElements() == null || cigar.getCigarElements().isEmpty()) { + if (cigar == null + || cigar.getCigarElements() == null + || cigar.getCigarElements().isEmpty()) { if (isOldCigar) { if (rec.getReadUnmappedFlag()) { // don't bother to warn since this does occur for PE reads @@ -257,25 +273,24 @@ private static boolean isValidCigar(SAMRecord rec, Cigar cigar, boolean isOldCig return false; } - if (rec.getReadUnmappedFlag()){ - log.info("Unmapped read with cigar: " + rec.getReadName() + " (" + rec.getCigarString() + "/" + cigar.toString() + ")"); - + if (rec.getReadUnmappedFlag()) { + log.info("Unmapped read with cigar: " + rec.getReadName() + " (" + rec.getCigarString() + "/" + + cigar.toString() + ")"); } final List validationErrors = cigar.isValid(rec.getReadName(), -1); if (validationErrors != null && !validationErrors.isEmpty()) { - log.error("Invalid cigar for read " + rec + - (isOldCigar ? " " : " for new cigar with clipped adapter ") + - " (" + rec.getCigarString() + "/" + cigar.toString() + ") " + - validationErrors); + log.error("Invalid cigar for read " + rec + (isOldCigar ? " " : " for new cigar with clipped adapter ") + + " (" + + rec.getCigarString() + "/" + cigar.toString() + ") " + validationErrors); return false; } - - if (rec.getReadLength() != cigar.getReadLength()){ + + if (rec.getReadLength() != cigar.getReadLength()) { // throw new SAMException( - log.error( rec.getReadLength() + - " read length does not = cigar length " + cigar.getReferenceLength() + - (isOldCigar? " oldCigar " : " ") + - rec + " cigar:" + cigar); + log.error(rec.getReadLength() + " read length does not = cigar length " + + cigar.getReferenceLength() + (isOldCigar ? " oldCigar " : " ") + + rec + + " cigar:" + cigar); return false; } return true; @@ -292,18 +307,24 @@ private static boolean isValidCigar(SAMRecord rec, Cigar cigar, boolean isOldCig * @param fivePrimeEnd number of soft-clipped bases to add to the 5' end of the read * @param clippingOperator Type of clipping to use, either soft or hard. If non-clipping operator is used an exception is thrown */ - public static Cigar addClippedBasesToEndsOfCigar(final Cigar cigar, final boolean negativeStrand, - final int threePrimeEnd, final int fivePrimeEnd, final CigarOperator clippingOperator) { - ValidationUtils.validateArg(clippingOperator.isClipping(), () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); + public static Cigar addClippedBasesToEndsOfCigar( + final Cigar cigar, + final boolean negativeStrand, + final int threePrimeEnd, + final int fivePrimeEnd, + final CigarOperator clippingOperator) { + ValidationUtils.validateArg( + clippingOperator.isClipping(), + () -> "Clipping operator should be SOFT or HARD clip, found " + clippingOperator.toString()); List newCigar = new ArrayList(cigar.getCigarElements()); if (negativeStrand) { Collections.reverse(newCigar); } if (threePrimeEnd > 0) { - int last = newCigar.size()-1; + int last = newCigar.size() - 1; int bases = threePrimeEnd; - if(newCigar.get(last).getOperator() == clippingOperator) { + if (newCigar.get(last).getOperator() == clippingOperator) { final CigarElement oldClip = newCigar.remove(last); bases += oldClip.getLength(); } @@ -337,24 +358,25 @@ public static Cigar addClippedBasesToEndsOfCigar(final Cigar cigar, final boolea * * @return New cigar with additional soft-clipped bases */ - public static Cigar addSoftClippedBasesToEndsOfCigar(final Cigar cigar, final boolean negativeStrand, - final int threePrimeEnd, final int fivePrimeEnd) { - return addClippedBasesToEndsOfCigar(cigar, negativeStrand, threePrimeEnd, fivePrimeEnd, CigarOperator.SOFT_CLIP); + public static Cigar addSoftClippedBasesToEndsOfCigar( + final Cigar cigar, final boolean negativeStrand, final int threePrimeEnd, final int fivePrimeEnd) { + return addClippedBasesToEndsOfCigar( + cigar, negativeStrand, threePrimeEnd, fivePrimeEnd, CigarOperator.SOFT_CLIP); } // unpack a cigar string into an array of cigarOperators // to facilitate sequence manipulation - public static char[] cigarArrayFromElements(List cigar){ + public static char[] cigarArrayFromElements(List cigar) { int pos = 0; int length = 0; - for (CigarElement e : cigar){ + for (CigarElement e : cigar) { length += e.getLength(); } char[] result = new char[length]; - for (CigarElement e : cigar){ - for (int i = 0; i < e.getLength(); i++){ + for (CigarElement e : cigar) { + for (int i = 0; i < e.getLength(); i++) { CigarOperator o = e.getOperator(); - result[i+pos] = (char) CigarOperator.enumToCharacter(o); + result[i + pos] = (char) CigarOperator.enumToCharacter(o); } pos += e.getLength(); } @@ -363,26 +385,26 @@ public static char[] cigarArrayFromElements(List cigar){ // unpack a cigar string into an array of cigarOperators // to facilitate sequence manipulation - public static char[] cigarArrayFromString(String cigar){ - return cigarArrayFromElements(TextCigarCodec.decode(cigar).getCigarElements()); + public static char[] cigarArrayFromString(String cigar) { + return cigarArrayFromElements(TextCigarCodec.decode(cigar).getCigarElements()); } // construct a cigar string from an array of cigarOperators. - public static String cigarStringFromArray(final char[] cigar){ + public static String cigarStringFromArray(final char[] cigar) { String result = ""; int length = cigar.length; - char lastOp = 0; int lastLen = 0; - for (int i=0; i < length; i++){ - if (cigar[i] == lastOp){ - lastLen++; - } else if (cigar[i] == '-'){ - ; // nothing - just ignore '-' - } else { - if (lastOp != 0) - result = result + Integer.toString(lastLen) + Character.toString(lastOp); - lastLen = 1; - lastOp = cigar[i]; - } + char lastOp = 0; + int lastLen = 0; + for (int i = 0; i < length; i++) { + if (cigar[i] == lastOp) { + lastLen++; + } else if (cigar[i] == '-') { + ; // nothing - just ignore '-' + } else { + if (lastOp != 0) result = result + Integer.toString(lastLen) + Character.toString(lastOp); + lastLen = 1; + lastOp = cigar[i]; + } } return result + Integer.toString(lastLen) + Character.toString(lastOp); } diff --git a/src/main/java/htsjdk/samtools/util/CloseableIterator.java b/src/main/java/htsjdk/samtools/util/CloseableIterator.java index fa657be228..e54ca171f2 100755 --- a/src/main/java/htsjdk/samtools/util/CloseableIterator.java +++ b/src/main/java/htsjdk/samtools/util/CloseableIterator.java @@ -34,7 +34,7 @@ /** * This interface is used by iterators that use releasable resources during iteration. - * + * * The consumer of a CloseableIterator should ensure that the close() method is always called, * for example by putting such a call in a finally block. Two conventions should be followed * by all implementors of CloseableIterator: diff --git a/src/main/java/htsjdk/samtools/util/CloserUtil.java b/src/main/java/htsjdk/samtools/util/CloserUtil.java index c236197a34..ef3a1a4a04 100644 --- a/src/main/java/htsjdk/samtools/util/CloserUtil.java +++ b/src/main/java/htsjdk/samtools/util/CloserUtil.java @@ -1,81 +1,80 @@ -/* - * The MIT License - * - * Copyright (c) 2009 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ -package htsjdk.samtools.util; - -import java.io.Closeable; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -/** - * Utility to close things that implement Closeable - * WARNING: This should only be used for Closeable things open for read, because it ignores exceptions, and - * the caller will probably want to know about exceptions when closing a file being written to, because - * this may indicate a failure to flush. - * - * @author Kathleen Tibbetts - */ -public class CloserUtil { - - /** - * Calls close() on obj if it implements Closeable - * - * @param obj The potentially closeable object - */ - public static void close(Object obj) { - if (obj != null) { - close(Arrays.asList(obj)); - } - } - - /** - * Calls close() on all elements of objs that implement Closeable - * - * @param objs A list of potentially closeable objects - * - * NOTE: This method must take a List, not List, otherwise the overload above will be selected - * if the argument is not exactly List. - */ - public static void close(List objs) { - for (Object o : objs) { - if (o instanceof Closeable) { - try { - ((Closeable)o).close(); - } - catch (IOException ioe) { - // Do nothing - } - } else if (o instanceof CloseableIterator) { - ((CloseableIterator)o).close(); - } - else { - try { - java.lang.reflect.Method m = o.getClass().getMethod("close"); - m.invoke(o); - } - catch (Exception e) { /** Ignore */ } - } - } - } -} +/* + * The MIT License + * + * Copyright (c) 2009 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +/** + * Utility to close things that implement Closeable + * WARNING: This should only be used for Closeable things open for read, because it ignores exceptions, and + * the caller will probably want to know about exceptions when closing a file being written to, because + * this may indicate a failure to flush. + * + * @author Kathleen Tibbetts + */ +public class CloserUtil { + + /** + * Calls close() on obj if it implements Closeable + * + * @param obj The potentially closeable object + */ + public static void close(Object obj) { + if (obj != null) { + close(Arrays.asList(obj)); + } + } + + /** + * Calls close() on all elements of objs that implement Closeable + * + * @param objs A list of potentially closeable objects + * + * NOTE: This method must take a {@code List}, not {@code List}, otherwise the overload above will be selected + * if the argument is not exactly {@code List}. + */ + public static void close(List objs) { + for (Object o : objs) { + if (o instanceof Closeable) { + try { + ((Closeable) o).close(); + } catch (IOException ioe) { + // Do nothing + } + } else if (o instanceof CloseableIterator) { + ((CloseableIterator) o).close(); + } else { + try { + java.lang.reflect.Method m = o.getClass().getMethod("close"); + m.invoke(o); + } catch (Exception e) { + /** Ignore */ + } + } + } + } +} diff --git a/src/main/java/htsjdk/samtools/util/CollectionUtil.java b/src/main/java/htsjdk/samtools/util/CollectionUtil.java index 0354a5b888..35fc5f92c5 100755 --- a/src/main/java/htsjdk/samtools/util/CollectionUtil.java +++ b/src/main/java/htsjdk/samtools/util/CollectionUtil.java @@ -43,7 +43,8 @@ public class CollectionUtil { /** Simple case-insensitive lexical comparator of objects using their {@link Object#toString()} value. */ - final public static Comparator OBJECT_TOSTRING_COMPARATOR = (o1, o2) -> o1.toString().compareToIgnoreCase(o2.toString()); + public static final Comparator OBJECT_TOSTRING_COMPARATOR = + (o1, o2) -> o1.toString().compareToIgnoreCase(o2.toString()); public static List makeList(final T... list) { final List result = new ArrayList<>(); @@ -79,7 +80,8 @@ public static String join(final Collection items, final String inBetween) { public static T getSoleElement(final Collection items) { if (items.size() != 1) - throw new IllegalArgumentException(String.format("Expected a single element in %s, but found %s.", items, items.size())); + throw new IllegalArgumentException( + String.format("Expected a single element in %s, but found %s.", items, items.size())); return items.iterator().next(); } @@ -96,8 +98,7 @@ public void appendAll(final K k, final Collection v) { } private void initializeKeyIfUninitialized(final K k) { - if (!this.containsKey(k)) - this.put(k, new LinkedList<>()); + if (!this.containsKey(k)) this.put(k, new LinkedList<>()); } } @@ -120,7 +121,7 @@ public static Map> partition(final Collection collect * @deprecated use Collectors.groupingBy instead */ @Deprecated - public static abstract class Partitioner { + public abstract static class Partitioner { public abstract K getPartition(final V v); } @@ -128,7 +129,8 @@ public static abstract class Partitioner { * Partitions a collection into groups based on a characteristics of that group. Partitions are embodied in a map, whose keys are the * value of that characteristic, and the values are the partition of elements whose characteristic evaluate to that key. */ - public static Map> partition(final Collection collection, final Function keyer) { + public static Map> partition( + final Collection collection, final Function keyer) { final MultiMap partitionToValues = new MultiMap<>(); for (final V entry : collection) { partitionToValues.append(keyer.apply(entry), entry); @@ -138,12 +140,12 @@ public static Map> partition(final Collection collect /** * A defaulting map, which returns a default value when a value that does not exist in the map is looked up. - * + * * This map supports two modes: injecting-on-default, and not injecting-on-default. When injecting on default, when a lookup is * performed and a default value is returned, the default value is injected at that key, so that it now lives in the underlying map. * Without this mode, the value is simply returned and the underlying map is unaffected. - * - * Note: When using injecting-on-default mode, and performing a lookup with a non-key type (the get method accepts any object), a + * + * Note: When using injecting-on-default mode, and performing a lookup with a non-key type (the get method accepts any object), a * class cast exception will be thrown because a non-key type cannot be added to the map. * @param * @param @@ -151,12 +153,12 @@ public static Map> partition(final Collection collect public static class DefaultingMap extends HashMap { final Factory defaultGenerator; final boolean injectValueOnDefault; - + /** Creates a defaulting map which defaults to the provided value and with injecting-on-default disabled. */ public DefaultingMap(final V defaultValue) { this(k -> defaultValue, false); } - + /** * Creates a defaulting map that generates defaults from the provided factory. This is useful when the default is non-static, or * the default is mutable, and the client wishes to get a value and mutate it and persist those changes in the map. @@ -172,14 +174,14 @@ public V get(final Object key) { if (!this.containsKey(key)) { final V val = this.defaultGenerator.make((K) key); if (this.injectValueOnDefault) { - this.put((K) key, val); + this.put((K) key, val); } return val; } else { return super.get(key); } } - + public interface Factory { /** * @param k @@ -187,5 +189,4 @@ public interface Factory { V make(K k); } } - } diff --git a/src/main/java/htsjdk/samtools/util/ComparableTuple.java b/src/main/java/htsjdk/samtools/util/ComparableTuple.java index 1fe86ed4c1..9adec014c5 100644 --- a/src/main/java/htsjdk/samtools/util/ComparableTuple.java +++ b/src/main/java/htsjdk/samtools/util/ComparableTuple.java @@ -7,7 +7,8 @@ * * @author farjoun */ -public class ComparableTuple, B extends Comparable> extends Tuple implements Comparable> { +public class ComparableTuple, B extends Comparable> extends Tuple + implements Comparable> { public ComparableTuple(final A a, final B b) { super(a, b); diff --git a/src/main/java/htsjdk/samtools/util/CoordMath.java b/src/main/java/htsjdk/samtools/util/CoordMath.java index 7682026f7c..78f30230f5 100644 --- a/src/main/java/htsjdk/samtools/util/CoordMath.java +++ b/src/main/java/htsjdk/samtools/util/CoordMath.java @@ -45,8 +45,9 @@ public static int getEnd(final int start, final int length) { * Checks to see if the two sets of coordinates have any overlap. */ public static boolean overlaps(final int start, final int end, final int start2, final int end2) { - return (start2 >= start && start2 <= end) || (end2 >=start && end2 <= end) || - encloses(start2, end2, start, end); + return (start2 >= start && start2 <= end) + || (end2 >= start && end2 <= end) + || encloses(start2, end2, start, end); } /** Returns true if the "inner" coords and totally enclosed by the "outer" coords. */ @@ -61,10 +62,10 @@ public static boolean encloses(final int outerStart, final int outerEnd, final i public static int getOverlap(final int start, final int end, final int start2, final int end2) { return getLength(Math.max(start, start2), Math.min(end, end2)); } - - /** + + /** * Determines the read cycle number for the base - * + * * @param isNegativeStrand true if the read is negative strand * @param readLength * @param readBaseIndex the 0-based index of the read base in question @@ -72,5 +73,4 @@ public static int getOverlap(final int start, final int end, final int start2, f public static int getCycle(boolean isNegativeStrand, int readLength, final int readBaseIndex) { return isNegativeStrand ? readLength - readBaseIndex : readBaseIndex + 1; } - } diff --git a/src/main/java/htsjdk/samtools/util/CoordSpanInputSteam.java b/src/main/java/htsjdk/samtools/util/CoordSpanInputSteam.java index 92f60f7760..0563583711 100644 --- a/src/main/java/htsjdk/samtools/util/CoordSpanInputSteam.java +++ b/src/main/java/htsjdk/samtools/util/CoordSpanInputSteam.java @@ -2,7 +2,6 @@ import htsjdk.samtools.Chunk; import htsjdk.samtools.seekablestream.SeekableStream; - import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; @@ -56,8 +55,7 @@ public int read() throws IOException { return -1; } - if (delegate.position() < current.getChunkEnd()) - return delegate.read(); + if (delegate.position() < current.getChunkEnd()) return delegate.read(); nextChunk(); @@ -76,8 +74,7 @@ public int read(byte[] buffer, int offset, int length) throws IOException { if (available > length) return delegate.read(buffer, offset, length); int read = delegate.read(buffer, offset, available); - if (delegate.position() >= current.getChunkEnd()) - nextChunk(); + if (delegate.position() >= current.getChunkEnd()) nextChunk(); return read; } diff --git a/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java index 16f99501cf..97c93199d2 100644 --- a/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java @@ -11,8 +11,8 @@ * @author Tim Fennell */ public class CustomGzipOutputStream extends GZIPOutputStream { - public CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) throws - IOException { + public CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) + throws IOException { super(outputStream, bufferSize); this.def.setLevel(compressionLevel); } diff --git a/src/main/java/htsjdk/samtools/util/DateParser.java b/src/main/java/htsjdk/samtools/util/DateParser.java index f2d9481c78..8acedb1214 100644 --- a/src/main/java/htsjdk/samtools/util/DateParser.java +++ b/src/main/java/htsjdk/samtools/util/DateParser.java @@ -73,7 +73,6 @@ This W3C work (including software, documents, or other related items) is package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; @@ -91,20 +90,16 @@ This W3C work (including software, documents, or other related items) is */ public class DateParser { - private static boolean check(StringTokenizer st, String token) - throws InvalidDateException - { + private static boolean check(StringTokenizer st, String token) throws InvalidDateException { if (!st.hasMoreElements()) return false; if (st.nextToken().equals(token)) { return true; } else { - throw new InvalidDateException("Missing ["+token+"]"); + throw new InvalidDateException("Missing [" + token + "]"); } } - private static Calendar getCalendar(String isodate) - throws InvalidDateException - { + private static Calendar getCalendar(String isodate) throws InvalidDateException { // YYYY-MM-DDThh:mm:ss.sTZD StringTokenizer st = new StringTokenizer(isodate, "-T:.+Z", true); @@ -120,7 +115,7 @@ private static Calendar getCalendar(String isodate) } // Month if (check(st, "-") && (st.hasMoreTokens())) { - int month = Integer.parseInt(st.nextToken()) -1; + int month = Integer.parseInt(st.nextToken()) - 1; calendar.set(Calendar.MONTH, month); } else { return calendar; @@ -159,7 +154,7 @@ private static Calendar getCalendar(String isodate) // // Secondes - if (! st.hasMoreTokens()) { + if (!st.hasMoreTokens()) { return calendar; } String tok = st.nextToken(); @@ -167,7 +162,7 @@ private static Calendar getCalendar(String isodate) if (st.hasMoreTokens()) { int secondes = Integer.parseInt(st.nextToken()); calendar.set(Calendar.SECOND, secondes); - if (! st.hasMoreTokens()) { + if (!st.hasMoreTokens()) { return calendar; } // frac sec @@ -175,14 +170,14 @@ private static Calendar getCalendar(String isodate) if (tok.equals(".")) { // bug fixed, thx to Martin Bottcher String nt = st.nextToken(); - while(nt.length() < 3) { + while (nt.length() < 3) { nt += "0"; } - nt = nt.substring( 0, 3 ); //Cut trailing chars.. + nt = nt.substring(0, 3); // Cut trailing chars.. int millisec = Integer.parseInt(nt); - //int millisec = Integer.parseInt(st.nextToken()) * 10; + // int millisec = Integer.parseInt(st.nextToken()) * 10; calendar.set(Calendar.MILLISECOND, millisec); - if (! st.hasMoreTokens()) { + if (!st.hasMoreTokens()) { return calendar; } tok = st.nextToken(); @@ -197,16 +192,16 @@ private static Calendar getCalendar(String isodate) calendar.set(Calendar.MILLISECOND, 0); } // Timezone - if (! tok.equals("Z")) { // UTC - if (! (tok.equals("+") || tok.equals("-"))) { + if (!tok.equals("Z")) { // UTC + if (!(tok.equals("+") || tok.equals("-"))) { throw new InvalidDateException("only Z, + or - allowed"); } boolean plus = tok.equals("+"); - if (! st.hasMoreTokens()) { + if (!st.hasMoreTokens()) { throw new InvalidDateException("Missing hour field"); } int tzhour = Integer.parseInt(st.nextToken()); - int tzmin = 0; + int tzmin = 0; if (check(st, ":") && (st.hasMoreTokens())) { tzmin = Integer.parseInt(st.nextToken()); } else { @@ -227,8 +222,7 @@ private static Calendar getCalendar(String isodate) } } } catch (NumberFormatException ex) { - throw new InvalidDateException("["+ex.getMessage()+ - "] is not an integer"); + throw new InvalidDateException("[" + ex.getMessage() + "] is not an integer"); } return calendar; } @@ -239,16 +233,14 @@ private static Calendar getCalendar(String isodate) * @return a Date instance * @exception InvalidDateException if the date is not valid */ - public static Date parse(String isodate) - throws InvalidDateException - { + public static Date parse(String isodate) throws InvalidDateException { Calendar calendar = getCalendar(isodate); return calendar.getTime(); } private static String twoDigit(int i) { - if (i >=0 && i < 10) { - return "0"+String.valueOf(i); + if (i >= 0 && i < 10) { + return "0" + String.valueOf(i); } return String.valueOf(i); } @@ -261,7 +253,8 @@ private static String twoDigit(int i) { public static String getIsoDate(Date date) { Calendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC")); calendar.setTime(date); - return new StringBuffer().append(calendar.get(Calendar.YEAR)) + return new StringBuffer() + .append(calendar.get(Calendar.YEAR)) .append("-") .append(twoDigit(calendar.get(Calendar.MONTH) + 1)) .append("-") @@ -274,12 +267,12 @@ public static String getIsoDate(Date date) { .append(twoDigit(calendar.get(Calendar.SECOND))) .append(".") .append(twoDigit(calendar.get(Calendar.MILLISECOND) / 10)) - .append("Z").toString(); + .append("Z") + .toString(); } public static class InvalidDateException extends SAMException { - public InvalidDateException() { - } + public InvalidDateException() {} public InvalidDateException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java b/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java index b353fb132f..a61799e748 100644 --- a/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java +++ b/src/main/java/htsjdk/samtools/util/DiskBackedQueue.java @@ -26,7 +26,6 @@ import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMException; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -75,7 +74,6 @@ public class DiskBackedQueue implements Queue { */ private final SortingCollection.Codec codec; - /** * Prepare to accumulate records * @@ -83,8 +81,8 @@ public class DiskBackedQueue implements Queue { * @param maxRecordsInRam how many records to accumulate before spilling to disk * @param tmpDirs Where to write files of records that will not fit in RAM */ - private DiskBackedQueue(final SortingCollection.Codec codec, - final int maxRecordsInRam, final List tmpDirs) { + private DiskBackedQueue( + final SortingCollection.Codec codec, final int maxRecordsInRam, final List tmpDirs) { if (maxRecordsInRam < 0) { throw new IllegalArgumentException("maxRecordsInRamQueue must be >= 0"); } @@ -94,7 +92,9 @@ private DiskBackedQueue(final SortingCollection.Codec codec, for (final Path tmpDir : tmpDirs) IOUtil.assertDirectoryIsWritable(tmpDir); this.tmpDirs = tmpDirs; this.codec = codec; - this.maxRecordsInRamQueue = (maxRecordsInRam == 0) ? 0 : maxRecordsInRam - 1; // the first of our ram records is stored as headRecord + this.maxRecordsInRamQueue = (maxRecordsInRam == 0) + ? 0 + : maxRecordsInRam - 1; // the first of our ram records is stored as headRecord this.ramRecords = new ArrayDeque(this.maxRecordsInRamQueue); } @@ -105,10 +105,10 @@ private DiskBackedQueue(final SortingCollection.Codec codec, * @param maxRecordsInRam how many records to accumulate in memory before spilling to disk * @param tmpDir Where to write files of records that will not fit in RAM */ - public static DiskBackedQueue newInstance(final SortingCollection.Codec codec, - final int maxRecordsInRam, - final List tmpDir) { - return new DiskBackedQueue(codec, maxRecordsInRam, tmpDir.stream().map(File::toPath).collect(Collectors.toList())); + public static DiskBackedQueue newInstance( + final SortingCollection.Codec codec, final int maxRecordsInRam, final List tmpDir) { + return new DiskBackedQueue( + codec, maxRecordsInRam, tmpDir.stream().map(File::toPath).collect(Collectors.toList())); } /** @@ -118,9 +118,8 @@ public static DiskBackedQueue newInstance(final SortingCollection.Codec DiskBackedQueue newInstanceFromPaths(final SortingCollection.Codec codec, - final int maxRecordsInRam, - final List tmpDir) { + public static DiskBackedQueue newInstanceFromPaths( + final SortingCollection.Codec codec, final int maxRecordsInRam, final List tmpDir) { return new DiskBackedQueue(codec, maxRecordsInRam, tmpDir); } @@ -146,19 +145,21 @@ public boolean headRecordIsFromDisk() { */ @Override public boolean add(final E record) throws IllegalStateException { - if (!canAdd) throw new IllegalStateException("Cannot add to DiskBackedQueue whose canAdd() method returns false"); + if (!canAdd) + throw new IllegalStateException("Cannot add to DiskBackedQueue whose canAdd() method returns false"); - // NB: we add all the records before removing them, so we can never have spilled to disk unless all the space for ram records + // NB: we add all the records before removing them, so we can never have spilled to disk unless all the space + // for ram records // have been exhausted. if (this.headRecord == null) { // this is the first record in the queue - if (0 < this.numRecordsOnDisk) throw new SAMException("Head record was null but we have records on disk. Bug!"); + if (0 < this.numRecordsOnDisk) + throw new SAMException("Head record was null but we have records on disk. Bug!"); this.headRecord = record; - } - else if (this.ramRecords.size() == this.maxRecordsInRamQueue) { + } else if (this.ramRecords.size() == this.maxRecordsInRamQueue) { spillToDisk(record); - } - else { - if (0 < this.numRecordsOnDisk) throw new SAMException("Trying to add records to RAM but there were records on disk. Bug!"); + } else { + if (0 < this.numRecordsOnDisk) + throw new SAMException("Trying to add records to RAM but there were records on disk. Bug!"); this.ramRecords.add(record); } return true; @@ -174,8 +175,7 @@ public E remove() { final E element = this.poll(); if (element == null) { throw new NoSuchElementException("Attempting to remove() from empty DiskBackedQueue"); - } - else { + } else { return element; } } @@ -193,8 +193,7 @@ public E poll() { public E element() { if (this.headRecord != null) { return this.headRecord; - } - else { + } else { throw new NoSuchElementException("Attempting to element() from empty DiskBackedQueue"); } } @@ -268,14 +267,16 @@ private void spillToDisk(final E record) throws RuntimeIOException { try { if (this.diskRecords == null) { this.diskRecords = newTempFile(); - this.outputStream = tempStreamFactory.wrapTempOutputStream(Files.newOutputStream(this.diskRecords), Defaults.BUFFER_SIZE); + this.outputStream = tempStreamFactory.wrapTempOutputStream( + Files.newOutputStream(this.diskRecords), Defaults.BUFFER_SIZE); this.codec.setOutputStream(this.outputStream); } this.codec.encode(record); this.outputStream.flush(); this.numRecordsOnDisk++; } catch (final IOException e) { - throw new RuntimeIOException("Problem writing temporary file. Try setting TMP_DIR to a file system with lots of space.", e); + throw new RuntimeIOException( + "Problem writing temporary file. Try setting TMP_DIR to a file system with lots of space.", e); } } @@ -284,7 +285,8 @@ private void spillToDisk(final E record) throws RuntimeIOException { * on JVM exit and then returns it. */ private Path newTempFile() throws IOException { - return IOUtil.newTempPath("diskbackedqueue.", ".tmp", this.tmpDirs.toArray(new Path[tmpDirs.size()]), IOUtil.FIVE_GBS); + return IOUtil.newTempPath( + "diskbackedqueue.", ".tmp", this.tmpDirs.toArray(new Path[tmpDirs.size()]), IOUtil.FIVE_GBS); } /** @@ -295,12 +297,10 @@ private void updateQueueHead() { if (!this.ramRecords.isEmpty()) { this.headRecord = this.ramRecords.poll(); if (0 < numRecordsOnDisk) this.canAdd = false; - } - else if (this.diskRecords != null) { + } else if (this.diskRecords != null) { this.headRecord = this.readFileRecord(this.diskRecords); this.canAdd = false; - } - else { + } else { this.canAdd = true; this.headRecord = null; } @@ -314,7 +314,7 @@ else if (this.diskRecords != null) { * @return The next element from the head of the file, or null if end-of-file is reached * @throws RuntimeIOException */ - private E readFileRecord (final Path file) { + private E readFileRecord(final Path file) { if (this.canAdd) this.canAdd = false; // NB: should this just be an assignment regardless? // we never wrote a record to disk diff --git a/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java index 7fd7a69a5f..e555b8a82d 100644 --- a/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java +++ b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java @@ -33,12 +33,13 @@ * By default duplicate reads and non-primary alignments are filtered out. Filtering may be changed * via setSamFilters(). Difference from SamLocusIterator is that this implementation accumulates data * only about start and end of alignment blocks from reads, not about each aligned base. - * + * * @author Darina_Nikolaeva@epam.com, EPAM Systems, Inc. * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. - * + * */ -public class EdgeReadIterator extends AbstractLocusIterator> { +public class EdgeReadIterator + extends AbstractLocusIterator> { // These variables are required to perform the detection of overlap between reads and intervals private Interval currentInterval = null; private final PeekableIterator intervalListIterator; @@ -106,15 +107,16 @@ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalLi * @param rec The record we want to consider * @return True, if rec is fully contained in the current interval, otherwise false */ - protected boolean advanceCurrentIntervalAndCheckIfIntervalContainsRead(final SAMRecord rec) { - // currentInterval should never be null when calling this method, but we have to check it just to make sure, - // so that we don't get a NullPointerException in the return statement. - if (currentInterval == null) { - return false; - } + protected boolean advanceCurrentIntervalAndCheckIfIntervalContainsRead(final SAMRecord rec) { + // currentInterval should never be null when calling this method, but we have to check it just to make sure, + // so that we don't get a NullPointerException in the return statement. + if (currentInterval == null) { + return false; + } // Here we need to update the currentInterval. We have to do this using an // IntervalCoordinateComparator to take factor in the order in the sequence dictionary. - while (intervalListIterator.peek() != null && intervalCoordinateComparator.compare(new Interval(rec), intervalListIterator.peek()) > 0) { + while (intervalListIterator.peek() != null + && intervalCoordinateComparator.compare(new Interval(rec), intervalListIterator.peek()) > 0) { currentInterval = intervalListIterator.next(); } return currentInterval.contains(rec); @@ -133,7 +135,8 @@ void accumulateSamRecord(SAMRecord rec) { // In the case that no intervals are passed, or that the current interval completely contains // the current read (which is the most common case for WGS), set needToConsiderIntervals to false, so we don't // have to find intersections and can later emit the read right away. - final boolean needToConsiderIntervals = intervals != null && !advanceCurrentIntervalAndCheckIfIntervalContainsRead(rec); + final boolean needToConsiderIntervals = + intervals != null && !advanceCurrentIntervalAndCheckIfIntervalContainsRead(rec); // interpret the CIGAR string and add the base info for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) { @@ -141,8 +144,10 @@ void accumulateSamRecord(SAMRecord rec) { // // Example: Read (or more accurately, AlignmentBlock) from position 101 to 108 // - // accumulator (ArrayList) 30 31 32 33 34 35 36 37 38 (has one LocusInfo at each position, corresponding to the genomic position) - // LocusInfo objects (with genomic pos.) 100 101 102 103 104 105 106 107 108 (the LocusInfo objects can contain EdgingRecordAndOffset objects, if a record starts or ends there) + // accumulator (ArrayList) 30 31 32 33 34 35 36 37 38 (has one LocusInfo at each + // position, corresponding to the genomic position) + // LocusInfo objects (with genomic pos.) 100 101 102 103 104 105 106 107 108 (the LocusInfo objects can + // contain EdgingRecordAndOffset objects, if a record starts or ends there) // ^ ^ // | | // EdgingRecordAndOffset objects B E @@ -167,52 +172,78 @@ void accumulateSamRecord(SAMRecord rec) { // Here we add the first entry to the accumulator, which is the start of this AlignmentBlock. if (accumulator.isEmpty()) { - accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), rec.getAlignmentStart())); + accumulator.add( + createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), rec.getAlignmentStart())); } // The accumulator should always have LocusInfos that correspond to one consecutive segment of loci from // one reference sequence. So - // accumulator.get(0).getPosition() + accumulator.size() == accumulator.get(accumulator.size()-1).getPosition()+1 + // accumulator.get(0).getPosition() + accumulator.size() == + // accumulator.get(accumulator.size()-1).getPosition()+1 final int accumulatorNextPosition = accumulator.get(0).getPosition() + accumulator.size(); - if (accumulatorNextPosition != accumulator.get(accumulator.size() - 1).getPosition() + 1) { + if (accumulatorNextPosition + != accumulator.get(accumulator.size() - 1).getPosition() + 1) { throw new IllegalStateException("The accumulator has gotten into a funk. Cannot continue"); } // Ensure there are consecutive AbstractLocusInfos up to and including the end of the AlignmentBlock - for (int locusPos = accumulatorNextPosition; locusPos <= referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); ++locusPos) { + for (int locusPos = accumulatorNextPosition; + locusPos <= referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); + ++locusPos) { accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()), locusPos)); } // Let's assume an alignment block starts in some locus. // We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus - // where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends. + // where the block starts. The second one has the "end" type which corresponds to the other locus where the + // block ends. // 0-based offset from the aligned position of the first base in the read to the aligned position // of the current base. - final int offsetStartOfAlignmentBlockOnReference = referencePositionStartOfAlignmentBlock - rec.getAlignmentStart(); + final int offsetStartOfAlignmentBlockOnReference = + referencePositionStartOfAlignmentBlock - rec.getAlignmentStart(); // Similar for the end of the alignment block. We can simply add the length of the block, since by // definition all bases in an AlignmentBlock match the reference alignment - final int offsetEndOfAlignmentBlockOnReference = offsetStartOfAlignmentBlockOnReference + alignmentBlock.getLength(); + final int offsetEndOfAlignmentBlockOnReference = + offsetStartOfAlignmentBlockOnReference + alignmentBlock.getLength(); if (needToConsiderIntervals) { - // If the read isn't fully contained within the currentInterval, we need to manually handle each of the overlaps. - - for (final Interval interval : overlapDetector.getOverlaps(new Interval(rec.getContig(), referencePositionStartOfAlignmentBlock, referencePositionStartOfAlignmentBlock + alignmentBlock.getLength()))) { - // In case the start position is smaller than the start of the interval, we need to determine the offset (we need this later)... - final int offsetStartOfIntervalInAlignmentBlock = referencePositionStartOfAlignmentBlock < interval.getStart() ? interval.getStart() - referencePositionStartOfAlignmentBlock : 0; + // If the read isn't fully contained within the currentInterval, we need to manually handle each of the + // overlaps. + + for (final Interval interval : overlapDetector.getOverlaps(new Interval( + rec.getContig(), + referencePositionStartOfAlignmentBlock, + referencePositionStartOfAlignmentBlock + alignmentBlock.getLength()))) { + // In case the start position is smaller than the start of the interval, we need to determine the + // offset (we need this later)... + final int offsetStartOfIntervalInAlignmentBlock = + referencePositionStartOfAlignmentBlock < interval.getStart() + ? interval.getStart() - referencePositionStartOfAlignmentBlock + : 0; // ... and add it to the start position to get the actual position from where we want to count. - final int offsetStartOfActualSequenceOnReference = offsetStartOfAlignmentBlockOnReference + offsetStartOfIntervalInAlignmentBlock; + final int offsetStartOfActualSequenceOnReference = + offsetStartOfAlignmentBlockOnReference + offsetStartOfIntervalInAlignmentBlock; // Similarly, we need to determine the actual end of the sequence we want to consider. - final int referencePositionEndOfAlignmentBlock = referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); - // For that, we find the difference between the end position of the AlignmentBlock and the end of the interval, and subtract it from the offset of end of the AlignmentBlock - final int offsetEndOfActualSequenceOnReference = offsetEndOfAlignmentBlockOnReference - (referencePositionEndOfAlignmentBlock > interval.getEnd() ? referencePositionEndOfAlignmentBlock - interval.getEnd() - 1 : 0); + final int referencePositionEndOfAlignmentBlock = + referencePositionStartOfAlignmentBlock + alignmentBlock.getLength(); + // For that, we find the difference between the end position of the AlignmentBlock and the end of + // the interval, and subtract it from the offset of end of the AlignmentBlock + final int offsetEndOfActualSequenceOnReference = offsetEndOfAlignmentBlockOnReference + - (referencePositionEndOfAlignmentBlock > interval.getEnd() + ? referencePositionEndOfAlignmentBlock - interval.getEnd() - 1 + : 0); final int length = offsetEndOfActualSequenceOnReference - offsetStartOfActualSequenceOnReference; // accumulate start of the overlap block - final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, offsetStartOfAlignmentBlockInRead + offsetStartOfIntervalInAlignmentBlock, length, referencePositionStartOfAlignmentBlock + offsetStartOfIntervalInAlignmentBlock); + final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset( + rec, + offsetStartOfAlignmentBlockInRead + offsetStartOfIntervalInAlignmentBlock, + length, + referencePositionStartOfAlignmentBlock + offsetStartOfIntervalInAlignmentBlock); accumulator.get(offsetStartOfActualSequenceOnReference).add(recordAndOffset); // accumulate end of the overlap block @@ -226,7 +257,8 @@ void accumulateSamRecord(SAMRecord rec) { final int length = offsetEndOfAlignmentBlockOnReference - offsetStartOfAlignmentBlockOnReference; // accumulate start of the alignment block - final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, offsetStartOfAlignmentBlockInRead, length, referencePositionStartOfAlignmentBlock); + final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset( + rec, offsetStartOfAlignmentBlockInRead, length, referencePositionStartOfAlignmentBlock); accumulator.get(offsetStartOfAlignmentBlockOnReference).add(recordAndOffset); // accumulate end of the alignment block @@ -238,7 +270,8 @@ void accumulateSamRecord(SAMRecord rec) { @Override void accumulateIndels(SAMRecord rec) { - throw new UnsupportedOperationException("Indels accumulation is not supported for " + getClass().getSimpleName() + "."); + throw new UnsupportedOperationException( + "Indels accumulation is not supported for " + getClass().getSimpleName() + "."); } /** @@ -278,7 +311,8 @@ AbstractLocusInfo createLocusInfo(SAMSequenceRecord refer @Override public void setMaxReadsToAccumulatePerLocus(int maxReadsToAccumulatePerLocus) { if (getMaxReadsToAccumulatePerLocus() != 0) { - throw new UnsupportedOperationException("Locus cap is not supported for " + getClass().getSimpleName() + "."); + throw new UnsupportedOperationException( + "Locus cap is not supported for " + getClass().getSimpleName() + "."); } } @@ -289,7 +323,8 @@ public void setMaxReadsToAccumulatePerLocus(int maxReadsToAccumulatePerLocus) { */ @Override public void setQualityScoreCutoff(int qualityScoreCutoff) { - throw new UnsupportedOperationException("Quality filtering is not supported for " + getClass().getSimpleName() + "."); + throw new UnsupportedOperationException( + "Quality filtering is not supported for " + getClass().getSimpleName() + "."); } /** @@ -301,15 +336,16 @@ public void setQualityScoreCutoff(int qualityScoreCutoff) { @Override public void setEmitUncoveredLoci(boolean emitUncoveredLoci) { if (isEmitUncoveredLoci() != emitUncoveredLoci) { - throw new UnsupportedOperationException(getClass().getSimpleName() + " doesn't support work with skipping " + - "uncovered bases."); + throw new UnsupportedOperationException( + getClass().getSimpleName() + " doesn't support work with skipping " + "uncovered bases."); } } @Override public void setIncludeIndels(boolean includeIndels) { if (isIncludeIndels() != includeIndels) { - throw new UnsupportedOperationException("Indels accumulation is not supported for " + getClass().getSimpleName() + "."); + throw new UnsupportedOperationException( + "Indels accumulation is not supported for " + getClass().getSimpleName() + "."); } } } diff --git a/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java index df282b00fc..2c01719ae5 100644 --- a/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java +++ b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java @@ -24,11 +24,11 @@ package htsjdk.samtools.util; -import htsjdk.samtools.SAMRecord; - import static htsjdk.samtools.util.EdgingRecordAndOffset.Type.BEGIN; import static htsjdk.samtools.util.EdgingRecordAndOffset.Type.END; +import htsjdk.samtools.SAMRecord; + /** * Holds a SAMRecord plus the zero-based offset into that SAMRecord's bases and quality scores that corresponds * to the base and quality for the start of alignment block at the genomic position described by the AbstractLocusInfo. @@ -40,7 +40,7 @@ * as for each alignment block two objects of EdgingRecordAndOffset are created with two different types. * The main idea of using EdgeReadIterator is to process alignment block starting from locus where BEGIN type occurs, * aggregate information per locus and keep it until END type occurs, then remove alignment block from consideration. - * + * * @author Darina_Nikolaeva@epam.com, EPAM Systems, Inc. * @author Mariia_Zueva@epam.com, EPAM Systems, Inc. */ @@ -71,7 +71,8 @@ public static EdgingRecordAndOffset createEndRecord(EdgingRecordAndOffset startR * an alignment block. */ public enum Type { - BEGIN, END + BEGIN, + END } private static class StartEdgingRecordAndOffset extends EdgingRecordAndOffset { @@ -166,7 +167,7 @@ private static class EndEdgingRecordAndOffset extends EdgingRecordAndOffset { /** * For object with type END this fields holds the reference to object with type BEGIN for the read. */ - final private EdgingRecordAndOffset start; + private final EdgingRecordAndOffset start; EndEdgingRecordAndOffset(EdgingRecordAndOffset record) { super(record.getRecord(), record.getOffset()); diff --git a/src/main/java/htsjdk/samtools/util/FastLineReader.java b/src/main/java/htsjdk/samtools/util/FastLineReader.java index 95d620267c..00ad6cd379 100644 --- a/src/main/java/htsjdk/samtools/util/FastLineReader.java +++ b/src/main/java/htsjdk/samtools/util/FastLineReader.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.Closeable; import java.io.IOException; import java.io.InputStream; @@ -90,7 +89,7 @@ public void close() { * @return Next byte from the input. Do not call if at EOF. */ public byte getByte() { - final byte ret = peekByte(); + final byte ret = peekByte(); ++nextByte; ensureBufferNotEmpty(); return ret; @@ -168,5 +167,4 @@ private boolean ensureBufferNotEmpty() { throw new SAMException("Exception reading InputStream", e); } } - } diff --git a/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java b/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java index 500b931824..f58a68aec5 100644 --- a/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java +++ b/src/main/java/htsjdk/samtools/util/FileAppendStreamLRUCache.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -51,16 +50,14 @@ private static class Functor implements ResourceLimitedMapFunctor FASTA = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( - ".fasta", - ".fasta.gz", - ".fas", - ".fas.gz", - ".fa", - ".fa.gz", - ".fna", - ".fna.gz", - ".txt", - ".txt.gz" - ))); - + ".fasta", ".fasta.gz", ".fas", ".fas.gz", ".fa", ".fa.gz", ".fna", ".fna.gz", ".txt", ".txt.gz"))); + public static final String FASTA_INDEX = ".fai"; /** extensions for alignment files SAM, BAM, CRAM. */ public static final String SAM = ".sam"; + public static final String BAM = ".bam"; public static final String BAI_INDEX = ".bai"; public static final String CRAM = ".cram"; public static final String CRAM_INDEX = ".crai"; - + public static final String BED = ".bed"; public static final String TABIX_INDEX = ".tbi"; public static final String TRIBBLE_INDEX = ".idx"; /** extensions for VCF files and related formats. */ public static final String VCF = ".vcf"; + public static final String VCF_INDEX = TRIBBLE_INDEX; public static final String BCF = ".bcf"; - public static final String COMPRESSED_VCF_BGZ = ".vcf.bgz"; // suffix used by gnomad see https://gnomad.broadinstitute.org/data#v4 + public static final String COMPRESSED_VCF_BGZ = + ".vcf.bgz"; // suffix used by gnomad see https://gnomad.broadinstitute.org/data#v4 public static final String COMPRESSED_VCF = ".vcf.gz"; public static final String COMPRESSED_VCF_INDEX = ".tbi"; - public static final List VCF_LIST = Collections.unmodifiableList(Arrays.asList(VCF, COMPRESSED_VCF, COMPRESSED_VCF_BGZ, BCF)); + public static final List VCF_LIST = + Collections.unmodifiableList(Arrays.asList(VCF, COMPRESSED_VCF, COMPRESSED_VCF_BGZ, BCF)); public static final String INTERVAL_LIST = ".interval_list"; public static final String COMPRESSED_INTERVAL_LIST = ".interval_list.gz"; @@ -78,7 +72,9 @@ public final class FileExtensions { public static final String CSI = ".csi"; public static final String MD5 = ".md5"; - public static final Set BLOCK_COMPRESSED = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(".gz", ".gzip", ".bgz", ".bgzf"))); + public static final Set BLOCK_COMPRESSED = + Collections.unmodifiableSet(new HashSet<>(Arrays.asList(".gz", ".gzip", ".bgz", ".bgzf"))); - public static final Set GFF3 = Collections.unmodifiableSet(new HashSet<>(Arrays.asList(".gff3", ".gff", ".gff3.gz", ".gff.gz"))); + public static final Set GFF3 = + Collections.unmodifiableSet(new HashSet<>(Arrays.asList(".gff3", ".gff", ".gff3.gz", ".gff.gz"))); } diff --git a/src/main/java/htsjdk/samtools/util/FormatUtil.java b/src/main/java/htsjdk/samtools/util/FormatUtil.java index a80c240754..a0c4dfb013 100644 --- a/src/main/java/htsjdk/samtools/util/FormatUtil.java +++ b/src/main/java/htsjdk/samtools/util/FormatUtil.java @@ -25,7 +25,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.File; import java.math.RoundingMode; import java.security.InvalidParameterException; @@ -48,9 +47,9 @@ public class FormatUtil { private final NumberFormat integerFormat; private final NumberFormat floatFormat; - /** Constructs a new FormatUtil and initializes various internal formatters. - * This is necessary because SimpleDateFormat and other formatters are not threadsafe. - */ + /** Constructs a new FormatUtil and initializes various internal formatters. + * This is necessary because SimpleDateFormat and other formatters are not threadsafe. + */ public FormatUtil() { this.dateFormat = new SimpleDateFormat("yyyy-MM-dd"); @@ -62,7 +61,7 @@ public FormatUtil() { this.floatFormat.setMaximumFractionDigits(DECIMAL_DIGITS_TO_PRINT); this.floatFormat.setRoundingMode(RoundingMode.HALF_DOWN); if (this.floatFormat instanceof DecimalFormat) { - final DecimalFormat decimalFormat = (DecimalFormat)this.floatFormat; + final DecimalFormat decimalFormat = (DecimalFormat) this.floatFormat; final DecimalFormatSymbols decimalFormatSymbols = decimalFormat.getDecimalFormatSymbols(); decimalFormatSymbols.setNaN("?"); decimalFormatSymbols.setInfinity("?"); @@ -71,48 +70,69 @@ public FormatUtil() { } /** Formats a short to an integer string. */ - public String format(short value) { return this.integerFormat.format(value); } + public String format(short value) { + return this.integerFormat.format(value); + } /** Formats an int to an integer string. */ - public String format(int value) { return this.integerFormat.format(value); } + public String format(int value) { + return this.integerFormat.format(value); + } /** Formats a long to an integer string. */ - public String format(long value) { return this.integerFormat.format(value); } + public String format(long value) { + return this.integerFormat.format(value); + } /** Formats a float to a floating point string. */ - public String format(float value) {return this.floatFormat.format(value); } + public String format(float value) { + return this.floatFormat.format(value); + } /** Formats a double to a floating point string. */ - public String format(double value) {return this.floatFormat.format(value); } + public String format(double value) { + return this.floatFormat.format(value); + } /** Formats a char as a string. */ - public String format(char value) { return Character.toString(value); } + public String format(char value) { + return Character.toString(value); + } /** Formats an enum to the String representation of an enum. */ - public String format(Enum value) { return value.name(); } + public String format(Enum value) { + return value.name(); + } /** Formats a date to a date string without time. */ - public String format(Date value) { return this.dateFormat.format(value); } + public String format(Date value) { + return this.dateFormat.format(value); + } - /** Formats date & time */ - public String format(final Iso8601Date value) { return value.toString(); } + /** Formats date and time */ + public String format(final Iso8601Date value) { + return value.toString(); + } /** Formats a boolean value to a String. */ - public String format(boolean value) { if (value) return "Y"; else return "N"; } + public String format(boolean value) { + if (value) return "Y"; + else return "N"; + } /** Attempts to determine the type of value and format it appropriately. */ public String format(Object value) { if (value == null) return ""; - if (value instanceof Short) return format( ((Short) value).shortValue() ); - if (value instanceof Integer) return format( ((Integer) value).intValue() ); - if (value instanceof Long) return format( ((Long) value).longValue() ); - if (value instanceof Float) return format( ((Float) value).floatValue() ); - if (value instanceof Double) return format( ((Double) value).doubleValue() ); - if (value instanceof Enum) return format( ((Enum) value) ); - if (value instanceof Iso8601Date) return format((Iso8601Date)value); - if (value instanceof Date) return format( ((Date) value) ); - if (value instanceof Boolean) return format( ((Boolean) value).booleanValue() ); - if (value instanceof Character) return format( ((Character)value).charValue() ); + if (value instanceof Short) return format(((Short) value).shortValue()); + if (value instanceof Integer) return format(((Integer) value).intValue()); + if (value instanceof Long) return format(((Long) value).longValue()); + if (value instanceof Float) return format(((Float) value).floatValue()); + if (value instanceof Double) return format(((Double) value).doubleValue()); + if (value instanceof Enum) return format(((Enum) value)); + if (value instanceof Iso8601Date) return format((Iso8601Date) value); + if (value instanceof Date) return format(((Date) value)); + if (value instanceof Boolean) return format(((Boolean) value).booleanValue()); + if (value instanceof Character) return format(((Character) value).charValue()); return value.toString(); } @@ -121,13 +141,19 @@ public String format(Object value) { /////////////////////////////////////////////////////////////////////////// /** Parses a String into a short. */ - public short parseShort(String value) { return Short.parseShort(value); } + public short parseShort(String value) { + return Short.parseShort(value); + } /** Parses a String into an int. */ - public int parseInt(String value) { return Integer.parseInt(value); } + public int parseInt(String value) { + return Integer.parseInt(value); + } /** Parses a String into a long. */ - public long parseLong(String value) { return Long.parseLong(value); } + public long parseLong(String value) { + return Long.parseLong(value); + } /** Parses a String into a float. */ public float parseFloat(String value) { @@ -137,25 +163,28 @@ public float parseFloat(String value) { /** Parses a String into a double. */ public double parseDouble(String value) { - if ("?".equals(value) || "-?".equals(value)) return Double.NaN; + if ("?".equals(value) || "-?".equals(value)) return Double.NaN; else return Double.parseDouble(value); } /** Parses a String into an Enum of the given type. */ - public E parseEnum(String value, Class type) { return (E) Enum.valueOf(type, value); } + public E parseEnum(String value, Class type) { + return (E) Enum.valueOf(type, value); + } /** Parses a String into a date. */ public Date parseDate(String value) { try { return this.dateFormat.parse(value); - } - catch (ParseException pe) { + } catch (ParseException pe) { throw new SAMException("Could not parse value as date: " + value, pe); } } /** Parse a String into an Iso8601 Date */ - public Iso8601Date parseIso8601Date(String value) { return new Iso8601Date(value); } + public Iso8601Date parseIso8601Date(String value) { + return new Iso8601Date(value); + } /** Parses a String into a boolean, as per the above convention that true = Y and false = N. */ public boolean parseBoolean(String value) { @@ -184,18 +213,18 @@ public char parseChar(String value) { * @return an object of the returnType */ public Object parseObject(String value, Class returnType) { - if (returnType == Short.class || returnType == Short.TYPE) return parseShort(value); - if (returnType == Integer.class || returnType == Integer.TYPE) return parseInt(value); - if (returnType == Long.class || returnType == Long.TYPE) return parseLong(value); - if (returnType == Float.class || returnType == Float.TYPE) return parseFloat(value); - if (returnType == Double.class || returnType == Double.TYPE) return parseDouble(value); - if (returnType == Boolean.class || returnType == Boolean.TYPE) return parseBoolean(value); - if (returnType == Byte.class || returnType == Byte.TYPE) return parseInt(value); + if (returnType == Short.class || returnType == Short.TYPE) return parseShort(value); + if (returnType == Integer.class || returnType == Integer.TYPE) return parseInt(value); + if (returnType == Long.class || returnType == Long.TYPE) return parseLong(value); + if (returnType == Float.class || returnType == Float.TYPE) return parseFloat(value); + if (returnType == Double.class || returnType == Double.TYPE) return parseDouble(value); + if (returnType == Boolean.class || returnType == Boolean.TYPE) return parseBoolean(value); + if (returnType == Byte.class || returnType == Byte.TYPE) return parseInt(value); if (returnType == Character.class || returnType == Character.TYPE) return parseChar(value); - if (returnType == Iso8601Date.class) return parseIso8601Date(value); - if (returnType == Date.class) return parseDate(value); - if (returnType == File.class) return new File(value); - if (Enum.class.isAssignableFrom(returnType)) return parseEnum(value, (Class)returnType); + if (returnType == Iso8601Date.class) return parseIso8601Date(value); + if (returnType == Date.class) return parseDate(value); + if (returnType == File.class) return new File(value); + if (Enum.class.isAssignableFrom(returnType)) return parseEnum(value, (Class) returnType); if (returnType == String.class) return value; throw new InvalidParameterException("Don't know how to convert a String to a " + returnType.getName()); diff --git a/src/main/java/htsjdk/samtools/util/GZIIndex.java b/src/main/java/htsjdk/samtools/util/GZIIndex.java index 095ddab349..ff482720bf 100644 --- a/src/main/java/htsjdk/samtools/util/GZIIndex.java +++ b/src/main/java/htsjdk/samtools/util/GZIIndex.java @@ -27,7 +27,6 @@ import java.io.*; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.nio.channels.ByteChannel; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.file.Files; @@ -93,9 +92,9 @@ public long getUncompressedOffset() { @Override public String toString() { - return String.format("IndexEntry={compressed=%d(0x%x),uncompressed=%d(0x%x)", - compressedOffset, compressedOffset, - uncompressedOffset, uncompressedOffset); + return String.format( + "IndexEntry={compressed=%d(0x%x),uncompressed=%d(0x%x)", + compressedOffset, compressedOffset, uncompressedOffset, uncompressedOffset); } @Override @@ -104,8 +103,7 @@ public boolean equals(final Object obj) { return false; } final IndexEntry other = (IndexEntry) obj; - return compressedOffset == other.compressedOffset - && uncompressedOffset == other.uncompressedOffset; + return compressedOffset == other.compressedOffset && uncompressedOffset == other.uncompressedOffset; } @Override @@ -167,7 +165,8 @@ public long getVirtualOffsetForSeek(final long uncompressedOffset) { } // binary search in the entries for the uncompressed offset - final int pos = Collections.binarySearch(entries, + final int pos = Collections.binarySearch( + entries, // this is a fake index for getting the uncompressed offsets new IndexEntry(0, uncompressedOffset), Comparator.comparingLong(IndexEntry::getUncompressedOffset)); @@ -199,7 +198,8 @@ public long getVirtualOffsetForSeek(final long uncompressedOffset) { // we use the file pointer utils to convert to the virtual-offset representation return BlockCompressedFilePointerUtil.makeFilePointer(indexEntry.getCompressedOffset(), blockOffset); } catch (ArithmeticException e) { - throw new IllegalArgumentException("Cannot handle offsets within blocks larger than " + Integer.MAX_VALUE, e); + throw new IllegalArgumentException( + "Cannot handle offsets within blocks larger than " + Integer.MAX_VALUE, e); } } @@ -317,7 +317,7 @@ public static final GZIIndex loadIndex(final String source, final ReadableByteCh buffer.order(ByteOrder.LITTLE_ENDIAN); if (Long.BYTES != channel.read(buffer)) { - throw getCorruptedIndexException(source, "less than " + Long.BYTES+ "bytes", null); + throw getCorruptedIndexException(source, "less than " + Long.BYTES + "bytes", null); } buffer.flip(); @@ -326,8 +326,10 @@ public static final GZIIndex loadIndex(final String source, final ReadableByteCh numberOfEntries = Math.toIntExact(buffer.getLong()); } catch (ArithmeticException e) { buffer.flip(); - throw getCorruptedIndexException(source, - String.format("HTSJDK cannot handle more than %d entries in .gzi index, but found %s", + throw getCorruptedIndexException( + source, + String.format( + "HTSJDK cannot handle more than %d entries in .gzi index, but found %s", Integer.MAX_VALUE, buffer.getLong()), e); } @@ -354,9 +356,9 @@ public static final GZIIndex loadIndex(final String source, final ReadableByteCh } } else if (entries.get(i - 1).getCompressedOffset() >= entry.getCompressedOffset() || entries.get(i - 1).getUncompressedOffset() >= entry.getUncompressedOffset()) { - throw getCorruptedIndexException(source, - String.format("index entries in misplaced order - %s vs %s", - entries.get(i - 1), entry), + throw getCorruptedIndexException( + source, + String.format("index entries in misplaced order - %s vs %s", entries.get(i - 1), entry), null); } @@ -366,11 +368,10 @@ public static final GZIIndex loadIndex(final String source, final ReadableByteCh return new GZIIndex(entries); } - private static final IOException getCorruptedIndexException(final String source, final String msg, final Exception e) { - return new IOException(String.format("Corrupted index file: %s (%s)", - msg, - source == null ? "unknown" : source), - e); + private static final IOException getCorruptedIndexException( + final String source, final String msg, final Exception e) { + return new IOException( + String.format("Corrupted index file: %s (%s)", msg, source == null ? "unknown" : source), e); } /** @@ -390,7 +391,8 @@ public static final GZIIndex buildIndex(final Path bgzipFile) throws IOException throw new IllegalArgumentException("null input path"); } // open the file for reading as a block-compressed file - try (final BlockCompressedInputStream bgzipStream = new BlockCompressedInputStream(Files.newInputStream(bgzipFile))) { + try (final BlockCompressedInputStream bgzipStream = + new BlockCompressedInputStream(Files.newInputStream(bgzipFile))) { // store the entries as a list final List entries = new ArrayList<>(); @@ -403,7 +405,8 @@ public static final GZIIndex buildIndex(final Path bgzipFile) throws IOException // if we are at the end of the block if (bgzipStream.endOfBlock()) { // gets the block address (compressed offset) - requires to parse with the file pointer utils - final long compressed = BlockCompressedFilePointerUtil.getBlockAddress(bgzipStream.getFilePointer()); + final long compressed = + BlockCompressedFilePointerUtil.getBlockAddress(bgzipStream.getFilePointer()); // add a new IndexEntry entries.add(new IndexEntry(compressed, currentOffset)); } @@ -439,8 +442,7 @@ public static Path resolveIndexNameForBgzipFile(final Path bgzipFile) { } // helper method for allocate a buffer for read/write - private static final ByteBuffer allocateBuffer(final int numberOfEntries, - final boolean includeNumberOfEntries) { + private static final ByteBuffer allocateBuffer(final int numberOfEntries, final boolean includeNumberOfEntries) { // everything is encoded as an unsigned long int size = (includeNumberOfEntries) ? Long.BYTES : 0; size += numberOfEntries * 2 * Long.BYTES; @@ -467,7 +469,8 @@ public GZIIndexer(final Path outputFile) throws IOException { output = Files.newOutputStream(outputFile); } - // Adds a new index location given the compressed file offset and a running tally based on the uncompressed block sizes + // Adds a new index location given the compressed file offset and a running tally based on the uncompressed + // block sizes public void addGzipBlock(final long compressedFileOffset, final long uncompressedBlockSize) { IndexEntry indexEntry = new IndexEntry(compressedFileOffset, uncompressedFileOffset); uncompressedFileOffset += uncompressedBlockSize; @@ -477,7 +480,7 @@ public void addGzipBlock(final long compressedFileOffset, final long uncompresse @Override public void close() throws IOException { GZIIndex index = new GZIIndex(entries); - index.writeIndex(output); //NOTE this relies on writeIndex closing the output stream for it + index.writeIndex(output); // NOTE this relies on writeIndex closing the output stream for it } } } diff --git a/src/main/java/htsjdk/samtools/util/GzipCodec.java b/src/main/java/htsjdk/samtools/util/GzipCodec.java new file mode 100644 index 0000000000..d2d3f90854 --- /dev/null +++ b/src/main/java/htsjdk/samtools/util/GzipCodec.java @@ -0,0 +1,386 @@ +/* + * The MIT License + * + * Copyright (c) 2024 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util; + +import htsjdk.samtools.Defaults; +import htsjdk.samtools.util.zip.DeflaterFactory; +import htsjdk.samtools.util.zip.InflaterFactory; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.zip.CRC32; +import java.util.zip.DataFormatException; +import java.util.zip.Deflater; +import java.util.zip.Inflater; + +/** + * A reusable codec for compressing and decompressing GZIP and BGZF data using direct + * {@link Deflater}/{@link Inflater} operations on {@link ByteBuffer}s. Designed to be + * allocated once and reused across many compress/decompress operations. + * + *

    Supports two output formats for compression: + *

      + *
    • {@link Format#GZIP} — standard 10-byte GZIP header (RFC 1952)
    • + *
    • {@link Format#BGZF} — BGZF header with BC extra subfield (SAM/BAM spec)
    • + *
    + * + *

    Decompression handles both formats transparently by parsing the FLG byte and + * skipping any optional GZIP fields. + * + *

    Not thread-safe. Use one instance per thread. + */ +public class GzipCodec { + + /** The output format for compression. */ + public enum Format { + GZIP, + BGZF + } + + // Standard GZIP header: 10 bytes (RFC 1952) + private static final int GZIP_HEADER_SIZE = 10; + + // BGZF header: 18 bytes (standard GZIP + FEXTRA with BC subfield) + private static final int BGZF_HEADER_SIZE = BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH; + + // GZIP trailer: CRC32 (4 bytes) + ISIZE (4 bytes) + private static final int GZIP_TRAILER_SIZE = 8; + + // GZIP magic bytes + private static final byte GZIP_ID1 = BlockCompressedStreamConstants.GZIP_ID1; + private static final byte GZIP_ID2 = (byte) BlockCompressedStreamConstants.GZIP_ID2; + private static final byte GZIP_CM_DEFLATE = BlockCompressedStreamConstants.GZIP_CM_DEFLATE; + + // GZIP FLG bits + private static final int FTEXT = 1; + private static final int FHCRC = 2; + private static final int FEXTRA = 4; + private static final int FNAME = 8; + private static final int FCOMMENT = 16; + + private final Deflater deflater; + private final Inflater inflater; + private final CRC32 crc32 = new CRC32(); + private boolean checkCrcs = false; + + /** Create a codec with the default compression level and default strategy. */ + public GzipCodec() { + this(Defaults.COMPRESSION_LEVEL, Deflater.DEFAULT_STRATEGY); + } + + /** Create a codec with the specified compression level and default strategy. */ + public GzipCodec(final int compressionLevel) { + this(compressionLevel, Deflater.DEFAULT_STRATEGY); + } + + /** Create a codec with the specified compression level and deflate strategy. */ + public GzipCodec(final int compressionLevel, final int deflateStrategy) { + this(compressionLevel, deflateStrategy, new DeflaterFactory(), new InflaterFactory()); + } + + /** + * Create a codec with full control over compression parameters and factory implementations. + * + * @param compressionLevel deflate compression level (0-9) + * @param deflateStrategy deflate strategy (e.g., {@link Deflater#DEFAULT_STRATEGY}, {@link Deflater#FILTERED}) + * @param deflaterFactory factory for creating Deflater instances + * @param inflaterFactory factory for creating Inflater instances + */ + public GzipCodec( + final int compressionLevel, + final int deflateStrategy, + final DeflaterFactory deflaterFactory, + final InflaterFactory inflaterFactory) { + // nowrap=true: we produce raw deflate and handle GZIP framing ourselves + this.deflater = deflaterFactory.makeDeflater(compressionLevel, true); + this.deflater.setStrategy(deflateStrategy); + this.inflater = inflaterFactory.makeInflater(true); + } + + /** Enable or disable CRC32 validation during decompression. */ + public void setCheckCrcs(final boolean check) { + this.checkCrcs = check; + } + + // -------------------------------------------------------------------------------------------- + // Compression + // -------------------------------------------------------------------------------------------- + + /** + * Compress data from {@code input} into {@code output} using standard GZIP format. + * + * @param input data to compress (from position to limit; position is advanced to limit) + * @param output buffer to write compressed data into (from position; position is advanced) + * @return number of bytes written to output + */ + public int compress(final ByteBuffer input, final ByteBuffer output) { + return compress(input, output, Format.GZIP); + } + + /** + * Compress data from {@code input} into {@code output} using the specified format. + * + * @param input data to compress (from position to limit; position is advanced to limit) + * @param output buffer to write compressed data into (from position; position is advanced) + * @param format the output format ({@link Format#GZIP} or {@link Format#BGZF}) + * @return number of bytes written to output + */ + public int compress(final ByteBuffer input, final ByteBuffer output, final Format format) { + final int outputStart = output.position(); + final int inputSize = input.remaining(); + + // Compute CRC32 over the uncompressed input + crc32.reset(); + final int inputPos = input.position(); + // Use a slice to avoid disturbing input's position + final ByteBuffer crcSlice = input.slice(); + crc32.update(crcSlice); + + // Write header (reserves space; for BGZF the block size is patched after deflation) + final int headerSize = writeHeader(output, format); + + // Extract input bytes for deflater (byte[] API for compatibility with LibdeflateDeflater) + final byte[] inputBytes; + final int inputOff; + input.position(inputPos); + if (input.hasArray()) { + inputBytes = input.array(); + inputOff = input.arrayOffset() + inputPos; + } else { + inputBytes = new byte[inputSize]; + input.get(inputBytes); + inputOff = 0; + } + + // Deflate into a temporary byte[] then copy to output buffer + deflater.reset(); + deflater.setInput(inputBytes, inputOff, inputSize); + deflater.finish(); + while (!deflater.finished()) { + final int n = + deflater.deflate(output.array(), output.arrayOffset() + output.position(), output.remaining()); + output.position(output.position() + n); + } + + // Write trailer: CRC32 + ISIZE (little-endian) + output.order(ByteOrder.LITTLE_ENDIAN); + output.putInt((int) crc32.getValue()); + output.putInt(inputSize); + + // For BGZF, patch the total block size into the header + if (format == Format.BGZF) { + final int totalBlockSize = output.position() - outputStart; + output.order(ByteOrder.LITTLE_ENDIAN); + output.putShort( + outputStart + BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET, (short) (totalBlockSize - 1)); + } + + return output.position() - outputStart; + } + + /** + * Compress data and return a new ByteBuffer containing the compressed result. + * + * @param input data to compress (from position to limit; position is advanced to limit) + * @return a new ByteBuffer containing the compressed data, positioned at 0 with limit at the end + */ + public ByteBuffer compress(final ByteBuffer input) { + return compress(input, Format.GZIP); + } + + /** + * Compress data and return a new ByteBuffer containing the compressed result. + * + * @param input data to compress (from position to limit; position is advanced to limit) + * @param format the output format + * @return a new ByteBuffer containing the compressed data, positioned at 0 with limit at the end + */ + public ByteBuffer compress(final ByteBuffer input, final Format format) { + // Worst case: incompressible data + header + trailer. Deflater overhead is at most + // 5 bytes per 32KB block + a few bytes for the zlib wrapper. + final int maxCompressed = input.remaining() + (input.remaining() / 16000 + 1) * 5 + 256; + final int headerSize = format == Format.BGZF ? BGZF_HEADER_SIZE : GZIP_HEADER_SIZE; + final ByteBuffer output = ByteBuffer.allocate(headerSize + maxCompressed + GZIP_TRAILER_SIZE); + compress(input, output, format); + output.flip(); + return output; + } + + /** Write a GZIP or BGZF header to the output buffer. Returns the header size. */ + private int writeHeader(final ByteBuffer output, final Format format) { + if (format == Format.BGZF) { + output.put(GZIP_ID1); + output.put(GZIP_ID2); + output.put(GZIP_CM_DEFLATE); + output.put((byte) FEXTRA); // FLG: FEXTRA set + output.putInt(0); // MTIME + output.put((byte) 0); // XFL + output.put((byte) 0xFF); // OS: unknown + output.order(ByteOrder.LITTLE_ENDIAN); + output.putShort(BlockCompressedStreamConstants.GZIP_XLEN); // XLEN = 6 + output.put(BlockCompressedStreamConstants.BGZF_ID1); // SI1 = 'B' + output.put(BlockCompressedStreamConstants.BGZF_ID2); // SI2 = 'C' + output.putShort(BlockCompressedStreamConstants.BGZF_LEN); // SLEN = 2 + output.putShort((short) 0); // BSIZE placeholder — patched after deflation + return BGZF_HEADER_SIZE; + } else { + output.put(GZIP_ID1); + output.put(GZIP_ID2); + output.put(GZIP_CM_DEFLATE); + output.put((byte) 0); // FLG: no optional fields + output.putInt(0); // MTIME + output.put((byte) 0); // XFL + output.put((byte) 0xFF); // OS: unknown + return GZIP_HEADER_SIZE; + } + } + + // -------------------------------------------------------------------------------------------- + // Decompression + // -------------------------------------------------------------------------------------------- + + /** + * Decompress GZIP or BGZF data from {@code input} into {@code output}. + * Handles both standard GZIP and BGZF transparently. + * + * @param input compressed data (from position to limit; position is advanced) + * @param output buffer to write decompressed data into (from position; position is advanced) + * @return number of decompressed bytes written to output + */ + public int decompress(final ByteBuffer input, final ByteBuffer output) { + input.order(ByteOrder.LITTLE_ENDIAN); + + // Parse and validate the GZIP header + if (input.remaining() < GZIP_HEADER_SIZE + GZIP_TRAILER_SIZE) { + throw new IllegalArgumentException("Input too small to be a valid GZIP block"); + } + + final byte id1 = input.get(); + final byte id2 = input.get(); + final byte cm = input.get(); + final int flg = input.get() & 0xFF; + if (id1 != GZIP_ID1 || id2 != GZIP_ID2 || cm != GZIP_CM_DEFLATE) { + throw new IllegalArgumentException("Invalid GZIP header"); + } + + input.position(input.position() + 6); // skip MTIME(4) + XFL(1) + OS(1) + + // Handle optional GZIP fields based on FLG bits + if ((flg & FEXTRA) != 0) { + final int xlen = input.getShort() & 0xFFFF; + input.position(input.position() + xlen); // skip extra field (includes BGZF subfield if present) + } + if ((flg & FNAME) != 0) { + while (input.get() != 0) {} // skip null-terminated filename + } + if ((flg & FCOMMENT) != 0) { + while (input.get() != 0) {} // skip null-terminated comment + } + if ((flg & FHCRC) != 0) { + input.position(input.position() + 2); // skip header CRC16 + } + + // The deflated data is between the current position and 8 bytes before the end + final int deflatedStart = input.position(); + final int deflatedEnd = input.limit() - GZIP_TRAILER_SIZE; + final int deflatedSize = deflatedEnd - deflatedStart; + if (deflatedSize < 0) { + throw new IllegalArgumentException("Invalid GZIP block: no room for deflated data and trailer"); + } + + // Extract deflated bytes for inflater (byte[] API for compatibility with LibdeflateInflater) + final byte[] deflatedBytes; + final int deflatedOff; + if (input.hasArray()) { + deflatedBytes = input.array(); + deflatedOff = input.arrayOffset() + deflatedStart; + } else { + deflatedBytes = new byte[deflatedSize]; + input.position(deflatedStart); + input.get(deflatedBytes); + deflatedOff = 0; + } + + inflater.reset(); + inflater.setInput(deflatedBytes, deflatedOff, deflatedSize); + + // Inflate into output + try { + int totalInflated = 0; + while (!inflater.finished() && output.hasRemaining()) { + final int n = + inflater.inflate(output.array(), output.arrayOffset() + output.position(), output.remaining()); + output.position(output.position() + n); + totalInflated += n; + } + + // Read trailer: CRC32 + ISIZE + input.position(deflatedEnd); + final int expectedCrc = input.getInt(); + final int expectedSize = input.getInt(); + + if (totalInflated != expectedSize) { + throw new IllegalStateException( + String.format("GZIP ISIZE mismatch: expected %d, got %d", expectedSize, totalInflated)); + } + + // Validate CRC32 if enabled + if (checkCrcs) { + crc32.reset(); + final ByteBuffer outputSlice = output.duplicate(); + outputSlice.flip(); + // Position to where we started writing + outputSlice.position(output.position() - totalInflated); + crc32.update(outputSlice); + if ((int) crc32.getValue() != expectedCrc) { + throw new IllegalStateException(String.format( + "GZIP CRC32 mismatch: expected %08x, got %08x", expectedCrc, (int) crc32.getValue())); + } + } + + return totalInflated; + } catch (final DataFormatException e) { + throw new IllegalStateException("Error inflating GZIP data", e); + } + } + + /** + * Decompress GZIP or BGZF data and return a new ByteBuffer containing the result. + * Reads the ISIZE field from the GZIP trailer to determine the output size. + * + * @param input compressed data (from position to limit; position is advanced) + * @return a new ByteBuffer containing the decompressed data, positioned at 0 with limit at the end + */ + public ByteBuffer decompress(final ByteBuffer input) { + // Read ISIZE from the last 4 bytes of the GZIP block to size the output + final int isizeOffset = input.limit() - 4; + final int isize = input.duplicate() + .order(ByteOrder.LITTLE_ENDIAN) + .position(isizeOffset) + .getInt(); + final ByteBuffer output = ByteBuffer.allocate(isize); + decompress(input, output); + output.flip(); + return output; + } +} diff --git a/src/main/java/htsjdk/samtools/util/Histogram.java b/src/main/java/htsjdk/samtools/util/Histogram.java index f2d7828dd1..912318babf 100644 --- a/src/main/java/htsjdk/samtools/util/Histogram.java +++ b/src/main/java/htsjdk/samtools/util/Histogram.java @@ -24,13 +24,11 @@ package htsjdk.samtools.util; -import htsjdk.samtools.SamReaderFactory; +import static java.lang.Math.*; import java.io.Serializable; import java.util.*; -import static java.lang.Math.*; - /** * Class for computing and accessing histogram type data. Stored internally in * a sorted Map so that keys can be iterated in order. @@ -39,7 +37,7 @@ */ public final class Histogram implements Serializable { private static final long serialVersionUID = 1L; - private String binLabel = "BIN"; + private String binLabel = "BIN"; private String valueLabel = "VALUE"; private final NavigableMap> map; @@ -81,17 +79,25 @@ public static class Bin implements Serializable { private double value = 0; /** Constructs a new bin with the given ID. */ - private Bin(final K id) { this.id = id; } + private Bin(final K id) { + this.id = id; + } /** Gets the ID of this bin. */ - public K getId() { return id; } + public K getId() { + return id; + } /** Gets the value in the bin. */ - public double getValue() { return value; } + public double getValue() { + return value; + } /** Returns the String format for the value in the bin. */ @Override - public String toString() { return String.valueOf(this.value); } + public String toString() { + return String.valueOf(this.value); + } /** Checks the equality of the bin by ID and value. */ @Override @@ -149,11 +155,21 @@ public void increment(final K id, final double increment) { bin.value += increment; } - public String getBinLabel() { return binLabel; } - public void setBinLabel(final String binLabel) { this.binLabel = binLabel; } + public String getBinLabel() { + return binLabel; + } - public String getValueLabel() { return valueLabel; } - public void setValueLabel(final String valueLabel) { this.valueLabel = valueLabel; } + public void setBinLabel(final String binLabel) { + this.binLabel = binLabel; + } + + public String getValueLabel() { + return valueLabel; + } + + public void setValueLabel(final String valueLabel) { + this.valueLabel = valueLabel; + } /** Checks that the labels and values in the two histograms are identical. */ @Override @@ -161,11 +177,11 @@ public boolean equals(final Object o) { if (o == this) { return true; } - return o != null && - (o instanceof Histogram) && - ((Histogram) o).binLabel.equals(this.binLabel) && - ((Histogram) o).valueLabel.equals(this.valueLabel) && - ((Histogram) o).map.equals(this.map); + return o != null + && (o instanceof Histogram) + && ((Histogram) o).binLabel.equals(this.binLabel) + && ((Histogram) o).valueLabel.equals(this.valueLabel) + && ((Histogram) o).map.equals(this.map); } @Override @@ -185,10 +201,10 @@ public int hashCode() { public double getMean() { // Could use simply getSum() / getCount(), but that would require iterating over the // values() set twice, which seems inefficient given how simply the computation is. - double product=0, totalCount=0; + double product = 0, totalCount = 0; for (final Bin bin : map.values()) { final double idValue = bin.getIdValue(); - final double count = bin.getValue(); + final double count = bin.getValue(); product += idValue * count; totalCount += count; @@ -236,7 +252,7 @@ public double getStandardDeviation() { total += localCount * pow(value - mean, 2); } - return Math.sqrt(total / (count-1)); + return Math.sqrt(total / (count - 1)); } /** @@ -303,10 +319,10 @@ public Collection> values() { */ public double getStandardDeviationBinSize(final double mean) { double total = 0; - for(final Bin bin : values()) { + for (final Bin bin : values()) { total += Math.pow(bin.getValue() - mean, 2); } - return Math.sqrt(total / (Math.max(1,values().size()-1))); + return Math.sqrt(total / (Math.max(1, values().size() - 1))); } /** @@ -320,12 +336,9 @@ public double getPercentile(final double percentile) { if (percentile <= 0) throw new IllegalArgumentException("Cannot query percentiles of 0 or below"); if (percentile >= 1) throw new IllegalArgumentException("Cannot query percentiles of 1 or above"); - values().stream() - .filter(b -> b.getValue() < 0) - .findFirst() - .ifPresent(b -> { - throw new IllegalStateException("Cannot calculate Percentile when negative counts are present " + - "in histogram. Bin " + b.getId() + "=" + b.getValue()); + values().stream().filter(b -> b.getValue() < 0).findFirst().ifPresent(b -> { + throw new IllegalStateException("Cannot calculate Percentile when negative counts are present " + + "in histogram. Bin " + b.getId() + "=" + b.getValue()); }); final double total = getCount(); @@ -341,7 +354,7 @@ public double getPercentile(final double percentile) { } /** - * Returns the cumulative probability of observing a value <= v when sampling the + * Returns the cumulative probability of observing a value {@code <=} v when sampling the * distribution represented by this histogram. * @throws UnsupportedOperationException if this histogram does not store instances of Number */ @@ -370,17 +383,16 @@ public double getMedian() { if (count % 2 == 0) { midLow = count / 2; midHigh = midLow + 1; - } - else { + } else { midLow = Math.ceil(count / 2); midHigh = midLow; } - Double midLowValue = null; + Double midLowValue = null; Double midHighValue = null; for (final Bin bin : values()) { total += bin.getValue(); - if (midLowValue == null && total >= midLow) midLowValue = bin.getIdValue(); + if (midLowValue == null && total >= midLow) midLowValue = bin.getIdValue(); if (midHighValue == null && total >= midHigh) midHighValue = bin.getIdValue(); if (midLowValue != null && midHighValue != null) break; } @@ -429,7 +441,6 @@ private Bin getModeBin() { return modeBin; } - /** * Returns the key with the lowest count. * @throws UnsupportedOperationException if this histogram does not store instances of Number @@ -478,20 +489,19 @@ public void trimByTailLimit(final int tailLimit) { final Bin modeBin = getModeBin(); final double mode = modeBin.getIdValue(); final double sizeOfModeBin = modeBin.getValue(); - final double minimumBinSize = sizeOfModeBin/tailLimit; + final double minimumBinSize = sizeOfModeBin / tailLimit; Bin lastBin = null; final List binsToKeep = new ArrayList<>(); for (Bin bin : values()) { - double binId = ((Number)bin.getId()).doubleValue(); + double binId = ((Number) bin.getId()).doubleValue(); if (binId <= mode) { binsToKeep.add(bin.getId()); - } - else if ((lastBin != null && ((Number)lastBin.getId()).doubleValue() != binId - 1) || bin.getValue() < minimumBinSize) { + } else if ((lastBin != null && ((Number) lastBin.getId()).doubleValue() != binId - 1) + || bin.getValue() < minimumBinSize) { break; - } - else { + } else { binsToKeep.add(bin.getId()); } lastBin = bin; @@ -517,13 +527,13 @@ public boolean isEmpty() { } /** - * Trims the histogram so that only bins <= width are kept. + * Trims the histogram so that only bins {@code <=} width are kept. */ public void trimByWidth(final int width) { final Iterator it = map.descendingKeySet().iterator(); while (it.hasNext()) { - if (((Number)it.next()).doubleValue() > width) { + if (((Number) it.next()).doubleValue() > width) { it.remove(); } else break; } @@ -538,11 +548,12 @@ public void trimByWidth(final int width) { */ public Histogram divideByHistogram(final Histogram divisorHistogram) { final Histogram output = new Histogram(); - if (!this.keySet().equals(divisorHistogram.keySet())) throw new IllegalArgumentException("Attempting to divide Histograms with non-identical bins"); - for (final K key : this.keySet()){ + if (!this.keySet().equals(divisorHistogram.keySet())) + throw new IllegalArgumentException("Attempting to divide Histograms with non-identical bins"); + for (final K key : this.keySet()) { final Bin dividend = this.get(key); final Bin divisor = divisorHistogram.get(key); - output.increment(key, dividend.getValue()/divisor.getValue()); + output.increment(key, dividend.getValue() / divisor.getValue()); } return output; } @@ -552,7 +563,7 @@ public Histogram divideByHistogram(final Histogram divisorHistogram) { * @param addHistogram */ public void addHistogram(final Histogram addHistogram) { - for (final K key : addHistogram.keySet()){ + for (final K key : addHistogram.keySet()) { this.increment(key, addHistogram.get(key).getValue()); } } @@ -574,7 +585,7 @@ public Set keySet() { /** * Return whether this histogram contains the given key. */ - public boolean containsKey(final K key){ + public boolean containsKey(final K key) { return map.containsKey(key); } } diff --git a/src/main/java/htsjdk/samtools/util/HttpUtils.java b/src/main/java/htsjdk/samtools/util/HttpUtils.java index a785bf307f..46d457bdd4 100644 --- a/src/main/java/htsjdk/samtools/util/HttpUtils.java +++ b/src/main/java/htsjdk/samtools/util/HttpUtils.java @@ -16,7 +16,7 @@ public static String getETag(final URL url) { return getHeaderField(url, "ETag"); } - private static URLConnection openConnection(final URL url) throws IOException{ + private static URLConnection openConnection(final URL url) throws IOException { final URLConnection conn = url.openConnection(); conn.setReadTimeout(3000); conn.setDefaultUseCaches(false); @@ -30,7 +30,8 @@ public static String getHeaderField(final URL url, final String name) { // Create a URLConnection object for a URL conn = openConnection(url); if (conn instanceof HttpURLConnection) { - // The HEAD method is identical to GET except that the server MUST NOT return a message-body in the response. + // The HEAD method is identical to GET except that the server MUST NOT return a message-body in the + // response. ((HttpURLConnection) conn).setRequestMethod("HEAD"); } return conn.getHeaderField(name); @@ -38,8 +39,7 @@ public static String getHeaderField(final URL url, final String name) { } catch (final IOException e) { e.printStackTrace(); return null; - } - finally { + } finally { if (conn != null && conn instanceof HttpURLConnection) { ((HttpURLConnection) conn).disconnect(); } @@ -55,12 +55,10 @@ public static void printHeaderFields(final URL url) { for (final String name : conn.getHeaderFields().keySet()) { System.out.println(name + "\t" + conn.getHeaderField(name)); - } } catch (Exception e) { e.printStackTrace(); - } - finally { + } finally { if (conn != null && conn instanceof HttpURLConnection) { ((HttpURLConnection) conn).disconnect(); } @@ -72,13 +70,10 @@ public static boolean resourceAvailable(final URL url) { } public static void main(final String[] args) throws MalformedURLException { - //printHeaderFields(new URL( + // printHeaderFields(new URL( // "http://www.broadinstitute.org/igvdata/1KG/DCC_merged/freeze5/NA12891.pilot2.SLX.bam")); - System.out.println(getETag(new URL( - "http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf"))); - System.out.println(resourceAvailable(new URL( - "http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf"))); - - + System.out.println(getETag(new URL("http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf"))); + System.out.println( + resourceAvailable(new URL("http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf"))); } } diff --git a/src/main/java/htsjdk/samtools/util/IOUtil.java b/src/main/java/htsjdk/samtools/util/IOUtil.java index 730506b2eb..24ff88ab97 100644 --- a/src/main/java/htsjdk/samtools/util/IOUtil.java +++ b/src/main/java/htsjdk/samtools/util/IOUtil.java @@ -30,7 +30,6 @@ import htsjdk.samtools.seekablestream.SeekableHTTPStream; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.nio.DeleteOnExitPathHook; - import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; @@ -164,7 +163,7 @@ public class IOUtil { private static int compressionLevel = Defaults.COMPRESSION_LEVEL; /** * Sets the GZip compression level for subsequent GZIPOutputStream object creation. - * @param compressionLevel 0 <= compressionLevel <= 9 + * @param compressionLevel {@code 0 <= compressionLevel <= 9} */ public static void setCompressionLevel(final int compressionLevel) { if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) { @@ -204,8 +203,7 @@ public static void transferByStream(final InputStream in, final OutputStream out out.write(buffer, 0, read); remaining -= read; } - } - catch (final IOException ioe) { + } catch (final IOException ioe) { throw new RuntimeIOException(ioe); } } @@ -226,13 +224,13 @@ public static OutputStream maybeBufferOutputStream(final OutputStream os, final } public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream, final int bufferSize) { - return bufferSize > 0 ? new SeekableBufferedStream(stream, bufferSize) : stream; + return bufferSize > 0 ? new SeekableBufferedStream(stream, bufferSize) : stream; } - + public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream) { return maybeBufferedSeekableStream(stream, Defaults.BUFFER_SIZE); } - + public static SeekableStream maybeBufferedSeekableStream(final File file) { try { return maybeBufferedSeekableStream(new SeekableFileStream(file)); @@ -278,7 +276,6 @@ public static Writer maybeBufferWriter(final Writer writer) { return maybeBufferWriter(writer, Defaults.BUFFER_SIZE); } - /** * Delete a list of files, and write a warning message if one could not be deleted. * @@ -301,7 +298,7 @@ public static void deleteFiles(final Iterable files) { } public static void deletePaths(final Path... paths) { - for(Path path: paths){ + for (Path path : paths) { deletePath(path); } } @@ -313,9 +310,9 @@ public static void deletePaths(final Path... paths) { * @param paths an iterable of Paths to delete */ public static void deletePaths(final Iterable paths) { - //Path is itself an Iterable which causes very confusing behavior if we don't explicitly check here. - if( paths instanceof Path){ - deletePath((Path)paths); + // Path is itself an Iterable which causes very confusing behavior if we don't explicitly check here. + if (paths instanceof Path) { + deletePath((Path) paths); } paths.forEach(IOUtil::deletePath); } @@ -323,7 +320,7 @@ public static void deletePaths(final Iterable paths) { /** * Attempt to delete a single path and log an error if it is not deleted. */ - public static void deletePath(Path path){ + public static void deletePath(Path path) { try { Files.delete(path); } catch (IOException e) { @@ -353,8 +350,9 @@ public static boolean isRegularPath(final Path path) { * Creates a new tmp file on one of the available temp filesystems, registers it for deletion * on JVM exit and then returns it. */ - public static File newTempFile(final String prefix, final String suffix, - final File[] tmpDirs, final long minBytesFree) throws IOException { + public static File newTempFile( + final String prefix, final String suffix, final File[] tmpDirs, final long minBytesFree) + throws IOException { File f = null; for (int i = 0; i < tmpDirs.length; ++i) { @@ -369,8 +367,7 @@ public static File newTempFile(final String prefix, final String suffix, } /** Creates a new tmp file on one of the potential filesystems that has at least 5GB free. */ - public static File newTempFile(final String prefix, final String suffix, - final File[] tmpDirs) throws IOException { + public static File newTempFile(final String prefix, final String suffix, final File[] tmpDirs) throws IOException { return newTempFile(prefix, suffix, tmpDirs, FIVE_GBS); } @@ -387,8 +384,9 @@ public static File getDefaultTmpDir() { * Creates a new tmp path on one of the available temp filesystems, registers it for deletion * on JVM exit and then returns it. */ - public static Path newTempPath(final String prefix, final String suffix, - final Path[] tmpDirs, final long minBytesFree) throws IOException { + public static Path newTempPath( + final String prefix, final String suffix, final Path[] tmpDirs, final long minBytesFree) + throws IOException { Path p = null; for (int i = 0; i < tmpDirs.length; ++i) { @@ -403,8 +401,7 @@ public static Path newTempPath(final String prefix, final String suffix, } /** Creates a new tmp file on one of the potential filesystems that has at least 5GB free. */ - public static Path newTempPath(final String prefix, final String suffix, - final Path[] tmpDirs) throws IOException { + public static Path newTempPath(final String prefix, final String suffix, final Path[] tmpDirs) throws IOException { return newTempPath(prefix, suffix, tmpDirs, FIVE_GBS); } @@ -438,41 +435,40 @@ public static void deleteOnExit(final Path path) { public static String basename(final File f) { final String full = f.getName(); final int index = full.lastIndexOf('.'); - if (index > 0 && index > full.lastIndexOf(File.separator)) { + if (index > 0 && index > full.lastIndexOf(File.separator)) { return full.substring(0, index); - } - else { + } else { return full; } } - + /** - * Checks that an input is is non-null, a URL or a file, exists, + * Checks that an input is is non-null, a URL or a file, exists, * and if its a file then it is not a directory and is readable. If any * condition is false then a runtime exception is thrown. * * @param input the input to check for validity */ public static void assertInputIsValid(final String input) { - if (input == null) { - throw new IllegalArgumentException("Cannot check validity of null input."); - } - if (!isUrl(input)) { - assertFileIsReadable(new File(input)); - } - } - - /** - * Returns true iff the string is a url. + if (input == null) { + throw new IllegalArgumentException("Cannot check validity of null input."); + } + if (!isUrl(input)) { + assertFileIsReadable(new File(input)); + } + } + + /** + * Returns true iff the string is a url. * Helps distinguish url inputs form file path inputs. */ public static boolean isUrl(final String input) { - try { - new URL(input); - return true; - } catch (MalformedURLException e) { - return false; - } + try { + new URL(input); + return true; + } catch (MalformedURLException e) { + return false; + } } /** @@ -495,13 +491,14 @@ public static void assertFileIsReadable(final Path path) { if (path == null) { throw new IllegalArgumentException("Cannot check readability of null file."); } else if (!Files.exists(path)) { - throw new SAMException("Cannot read non-existent file: " + path.toUri().toString()); - } - else if (Files.isDirectory(path)) { - throw new SAMException("Cannot read file because it is a directory: " + path.toUri().toString()); - } - else if (!Files.isReadable(path)) { - throw new SAMException("File exists but is not readable: " + path.toUri().toString()); + throw new SAMException( + "Cannot read non-existent file: " + path.toUri().toString()); + } else if (Files.isDirectory(path)) { + throw new SAMException("Cannot read file because it is a directory: " + + path.toUri().toString()); + } else if (!Files.isReadable(path)) { + throw new SAMException( + "File exists but is not readable: " + path.toUri().toString()); } } @@ -522,12 +519,11 @@ public static void assertFilesAreReadable(final List files) { * @param paths the list of paths to check for readability */ public static void assertPathsAreReadable(final List paths) { - for (final Path path: paths) assertFileIsReadable(path); + for (final Path path : paths) assertFileIsReadable(path); } - /** - * Checks that each string is non-null, exists or is a URL, + * Checks that each string is non-null, exists or is a URL, * and if it is a file then not a directory and is readable. If any * condition is false then a runtime exception is thrown. * @@ -551,22 +547,18 @@ public static void assertFileIsWritable(final File file) { // If the file doesn't exist, check that it's parent directory does and is writable final File parent = file.getAbsoluteFile().getParentFile(); if (!parent.exists()) { - throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + - "Neither file nor parent directory exist."); - } - else if (!parent.isDirectory()) { - throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + - "File does not exist and parent is not a directory."); - } - else if (!parent.canWrite()) { - throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + - "File does not exist and parent directory is not writable.."); + throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + + "Neither file nor parent directory exist."); + } else if (!parent.isDirectory()) { + throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + + "File does not exist and parent is not a directory."); + } else if (!parent.canWrite()) { + throw new SAMException("Cannot write file: " + file.getAbsolutePath() + ". " + + "File does not exist and parent directory is not writable.."); } - } - else if (file.isDirectory()) { + } else if (file.isDirectory()) { throw new SAMException("Cannot write file because it is a directory: " + file.getAbsolutePath()); - } - else if (!file.canWrite()) { + } else if (!file.canWrite()) { throw new SAMException("File exists but is not writable: " + file.getAbsolutePath()); } } @@ -582,14 +574,13 @@ public static void assertFilesAreWritable(final List files) { for (final File file : files) assertFileIsWritable(file); } - /** * In some filesystems (e.g. google cloud) it may not make sense to check writability. * This method only checks writability when it's (i.e. for now when the path points to a file * in the local filesystem) */ - public static void assertFileIsWritable(final Path path){ // tsato: perhaps the input type should be IOPath - if (path.toUri().getScheme().equals("file")){ + public static void assertFileIsWritable(final Path path) { // tsato: perhaps the input type should be IOPath + if (path.toUri().getScheme().equals("file")) { IOUtil.assertFileIsWritable(path.toFile()); } } @@ -614,15 +605,14 @@ public static void assertDirectoryIsWritable(final File dir) { public static void assertDirectoryIsWritable(final Path dir) { if (dir == null) { throw new IllegalArgumentException("Cannot check readability of null file."); - } - else if (!Files.exists(dir)) { + } else if (!Files.exists(dir)) { throw new SAMException("Directory does not exist: " + dir.toUri().toString()); - } - else if (!Files.isDirectory(dir)) { - throw new SAMException("Cannot write to directory because it is not a directory: " + dir.toUri().toString()); - } - else if (!Files.isWritable(dir)) { - throw new SAMException("Directory exists but is not writable: " + dir.toUri().toString()); + } else if (!Files.isDirectory(dir)) { + throw new SAMException("Cannot write to directory because it is not a directory: " + + dir.toUri().toString()); + } else if (!Files.isWritable(dir)) { + throw new SAMException( + "Directory exists but is not writable: " + dir.toUri().toString()); } } @@ -635,14 +625,12 @@ else if (!Files.isWritable(dir)) { public static void assertDirectoryIsReadable(final File dir) { if (dir == null) { throw new IllegalArgumentException("Cannot check readability of null file."); - } - else if (!dir.exists()) { + } else if (!dir.exists()) { throw new SAMException("Directory does not exist: " + dir.getAbsolutePath()); - } - else if (!dir.isDirectory()) { - throw new SAMException("Cannot read from directory because it is not a directory: " + dir.getAbsolutePath()); - } - else if (!dir.canRead()) { + } else if (!dir.isDirectory()) { + throw new SAMException( + "Cannot read from directory because it is not a directory: " + dir.getAbsolutePath()); + } else if (!dir.canRead()) { throw new SAMException("Directory exists but is not readable: " + dir.getAbsolutePath()); } } @@ -652,12 +640,11 @@ else if (!dir.canRead()) { */ public static void assertFilesEqual(final File f1, final File f2) { if (f1.length() != f2.length()) { - throw new SAMException("File " + f1 + " is " + f1.length() + " bytes but file " + f2 + " is " + f2.length() + " bytes."); + throw new SAMException( + "File " + f1 + " is " + f1.length() + " bytes but file " + f2 + " is " + f2.length() + " bytes."); } - try ( - final FileInputStream s1 = new FileInputStream(f1); - final FileInputStream s2 = new FileInputStream(f2); - ) { + try (final FileInputStream s1 = new FileInputStream(f1); + final FileInputStream s2 = new FileInputStream(f2); ) { final byte[] buf1 = new byte[1024 * 1024]; final byte[] buf2 = new byte[1024 * 1024]; int len1; @@ -703,17 +690,14 @@ public static InputStream openFileForReading(final File file) { public static InputStream openFileForReading(final Path path) { try { - if (hasGzipFileExtension(path)) { + if (hasGzipFileExtension(path)) { return openGzipFileForReading(path); - } - else { + } else { return Files.newInputStream(path); } - } - catch (IOException ioe) { + } catch (IOException ioe) { throw new SAMException("Error opening file: " + path, ioe); } - } /** @@ -736,8 +720,7 @@ public static InputStream openGzipFileForReading(final Path path) { try { return new GZIPInputStream(Files.newInputStream(path)); - } - catch (IOException ioe) { + } catch (IOException ioe) { throw new SAMException("Error opening file: " + path, ioe); } } @@ -778,7 +761,8 @@ public static OutputStream openFileForWriting(final Path path, OpenOption... ope return Files.newOutputStream(path, openOptions); } } catch (final IOException ioe) { - throw new SAMException("Error opening file for writing: " + path.toUri().toString(), ioe); + throw new SAMException( + "Error opening file for writing: " + path.toUri().toString(), ioe); } } @@ -795,14 +779,16 @@ public static boolean hasGzipFileExtension(Path path) { * Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error */ public static BufferedWriter openFileForBufferedWriting(final File file, final boolean append) { - return new BufferedWriter(new OutputStreamWriter(openFileForWriting(file, append)), Defaults.NON_ZERO_BUFFER_SIZE); + return new BufferedWriter( + new OutputStreamWriter(openFileForWriting(file, append)), Defaults.NON_ZERO_BUFFER_SIZE); } /** * Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error */ - public static BufferedWriter openFileForBufferedWriting(final Path path, final OpenOption ... openOptions) { - return new BufferedWriter(new OutputStreamWriter(openFileForWriting(path, openOptions)), Defaults.NON_ZERO_BUFFER_SIZE); + public static BufferedWriter openFileForBufferedWriting(final Path path, final OpenOption... openOptions) { + return new BufferedWriter( + new OutputStreamWriter(openFileForWriting(path, openOptions)), Defaults.NON_ZERO_BUFFER_SIZE); } /** @@ -823,7 +809,9 @@ public static BufferedWriter openFileForBufferedUtf8Writing(final File file) { * Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error */ public static BufferedWriter openFileForBufferedUtf8Writing(final Path path) { - return new BufferedWriter(new OutputStreamWriter(openFileForWriting(path), Charset.forName("UTF-8")), Defaults.NON_ZERO_BUFFER_SIZE); + return new BufferedWriter( + new OutputStreamWriter(openFileForWriting(path), Charset.forName("UTF-8")), + Defaults.NON_ZERO_BUFFER_SIZE); } /** @@ -851,7 +839,7 @@ public static OutputStream openGzipFileForWriting(final File file, final boolean * converts a boolean into an array containing either the append option or nothing */ private static OpenOption[] getAppendOpenOption(boolean append) { - return append ? new OpenOption[]{StandardOpenOption.APPEND} : EMPTY_OPEN_OPTIONS; + return append ? new OpenOption[] {StandardOpenOption.APPEND} : EMPTY_OPEN_OPTIONS; } /** @@ -861,7 +849,7 @@ private static OpenOption[] getAppendOpenOption(boolean append) { * @param openOptions options to control how the file is opened * @return the output stream to write to */ - public static OutputStream openGzipFileForWriting(final Path path, final OpenOption ... openOptions) { + public static OutputStream openGzipFileForWriting(final Path path, final OpenOption... openOptions) { try { final OutputStream out = Files.newOutputStream(path, openOptions); if (Defaults.BUFFER_SIZE > 0) { @@ -870,7 +858,8 @@ public static OutputStream openGzipFileForWriting(final Path path, final OpenOpt return new CustomGzipOutputStream(out, compressionLevel); } } catch (final IOException ioe) { - throw new SAMException("Error opening file for writing: " + path.toUri().toString(), ioe); + throw new SAMException( + "Error opening file for writing: " + path.toUri().toString(), ioe); } } @@ -894,7 +883,7 @@ public static void copyStream(final InputStream input, final OutputStream output try { final byte[] buffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE]; int bytesRead = 0; - while((bytesRead = input.read(buffer)) > 0) { + while ((bytesRead = input.read(buffer)) > 0) { output.write(buffer, 0, bytesRead); } } catch (IOException e) { @@ -929,7 +918,7 @@ public static File[] getFilesMatchingRegexp(final File directory, final String r } public static File[] getFilesMatchingRegexp(final File directory, final Pattern regexp) { - return directory.listFiles( new FilenameFilter() { + return directory.listFiles(new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { return regexp.matcher(name).matches(); @@ -974,13 +963,12 @@ public static long sizeOfTree(final File fileOrDirectory) { public static void copyDirectoryTree(final File fileOrDirectory, final File destination) { if (fileOrDirectory.isDirectory()) { destination.mkdir(); - for(final File f : fileOrDirectory.listFiles()) { - final File destinationFileOrDirectory = new File(destination.getPath(),f.getName()); - if (f.isDirectory()){ - copyDirectoryTree(f,destinationFileOrDirectory); - } - else { - copyFile(f,destinationFileOrDirectory); + for (final File f : fileOrDirectory.listFiles()) { + final File destinationFileOrDirectory = new File(destination.getPath(), f.getName()); + if (f.isDirectory()) { + copyDirectoryTree(f, destinationFileOrDirectory); + } else { + copyFile(f, destinationFileOrDirectory); } } } @@ -1003,8 +991,9 @@ public static void copyDirectoryTree(final File fileOrDirectory, final File dest */ @Deprecated public static File createTempDir(final String prefix, final String morePrefix) { - final String dotSeparatedSuffix = morePrefix == null ? ".tmp" : morePrefix.startsWith(".") ? morePrefix : "." + morePrefix; - return createTempDir(prefix + dotSeparatedSuffix).toFile() ; + final String dotSeparatedSuffix = + morePrefix == null ? ".tmp" : morePrefix.startsWith(".") ? morePrefix : "." + morePrefix; + return createTempDir(prefix + dotSeparatedSuffix).toFile(); } /* @@ -1050,17 +1039,17 @@ public static String getFullCanonicalPath(final File file) { try { File f = file.getCanonicalFile(); String canonicalPath = ""; - while (f != null && !f.getName().equals("")) { + while (f != null && !f.getName().equals("")) { canonicalPath = "/" + f.getName() + canonicalPath; f = f.getParentFile(); if (f != null) f = f.getCanonicalFile(); } return canonicalPath; } catch (final IOException ioe) { - throw new RuntimeIOException("Error getting full canonical path for " + - file + ": " + ioe.getMessage(), ioe); + throw new RuntimeIOException( + "Error getting full canonical path for " + file + ": " + ioe.getMessage(), ioe); } - } + } /** * Reads everything from an input stream as characters and returns a single String. @@ -1077,8 +1066,7 @@ public static String readFully(final InputStream in) { } return builder.toString(); - } - catch (final IOException ioe) { + } catch (final IOException ioe) { throw new RuntimeIOException("Error reading stream", ioe); } } @@ -1098,24 +1086,31 @@ public static IterableOnceIterator readLines(final File f) { private String next = in.readLine(); /** Returns true if there is another line to read or false otherwise. */ - @Override public boolean hasNext() { return next != null; } + @Override + public boolean hasNext() { + return next != null; + } /** Returns the next line in the file or null if there are no more lines. */ - @Override public String next() { + @Override + public String next() { try { final String tmp = next; next = in.readLine(); if (next == null) in.close(); return tmp; + } catch (final IOException ioe) { + throw new RuntimeIOException(ioe); } - catch (final IOException ioe) { throw new RuntimeIOException(ioe); } } /** Closes the underlying input stream. Not required if end of stream has already been hit. */ - @Override public void close() throws IOException { CloserUtil.close(in); } + @Override + public void close() throws IOException { + CloserUtil.close(in); + } }; - } - catch (final IOException e) { + } catch (final IOException e) { throw new RuntimeIOException(e); } } @@ -1195,22 +1190,19 @@ public static List unrollPaths(final Collection inputs, final String // If the file didn't match a given extension, treat it as a list of files if (!matched) { try { - Files.lines(p) - .map(String::trim) - .filter(s -> !s.isEmpty()) - .forEach(s -> { - final Path innerPath; - try { - innerPath = getPath(s); - stack.push(innerPath); - } catch (IOException e) { - throw new IllegalArgumentException("cannot convert " + s + " to a Path.", e); - } - } - ); + Files.lines(p).map(String::trim).filter(s -> !s.isEmpty()).forEach(s -> { + final Path innerPath; + try { + innerPath = getPath(s); + stack.push(innerPath); + } catch (IOException e) { + throw new IllegalArgumentException("cannot convert " + s + " to a Path.", e); + } + }); } catch (IOException e) { - throw new IllegalArgumentException("had trouble reading from " + p.toUri().toString(), e); + throw new IllegalArgumentException( + "had trouble reading from " + p.toUri().toString(), e); } } } @@ -1221,7 +1213,6 @@ public static List unrollPaths(final Collection inputs, final String return output; } - /** * Check if the given URI has a scheme. * @@ -1249,25 +1240,30 @@ public static boolean hasScheme(String uriString) { public static Path getPath(String uriString) throws IOException { URI uri = URI.create(uriString); try { - // if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the filesystem to use + // if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the + // filesystem to use return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri); } catch (FileSystemNotFoundException e) { ClassLoader cl = Thread.currentThread().getContextClassLoader(); if (cl == null) { throw e; } - return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri); + return FileSystems.newFileSystem(uri, new HashMap<>(), cl) + .provider() + .getPath(uri); } } public static List getPaths(List uriStrings) throws RuntimeIOException { - return uriStrings.stream().map(s -> { - try { - return IOUtil.getPath(s); - } catch (IOException e) { - throw new RuntimeIOException(e); - } - }).collect(Collectors.toList()); + return uriStrings.stream() + .map(s -> { + try { + return IOUtil.getPath(s); + } catch (IOException e) { + throw new RuntimeIOException(e); + } + }) + .collect(Collectors.toList()); } /* @@ -1286,7 +1282,7 @@ public static Path toPath(File fileOrNull) { * @param files a {@link List} of {@link File}s to convert to {@link Path}s * @return a new List containing the results of running toPath on the elements of the input */ - public static List filesToPaths(Collection files){ + public static List filesToPaths(Collection files) { return files.stream().map(File::toPath).collect(Collectors.toList()); } @@ -1299,7 +1295,8 @@ public static List filesToPaths(Collection files){ */ public static boolean isGZIPInputStream(final InputStream stream) { if (!stream.markSupported()) { - throw new IllegalArgumentException("isGZIPInputStream() : Cannot test a stream that doesn't support marking."); + throw new IllegalArgumentException( + "isGZIPInputStream() : Cannot test a stream that doesn't support marking."); } stream.mark(GZIP_HEADER_READ_LENGTH); @@ -1345,7 +1342,9 @@ public static boolean isBlockCompressed(final Path path, final boolean checkExte if (checkExtension && !hasBlockCompressedExtension(path)) { return false; } - try (final InputStream stream = new BufferedInputStream(Files.newInputStream(path), Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE))) { + try (final InputStream stream = new BufferedInputStream( + Files.newInputStream(path), + Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE))) { return BlockCompressedInputStream.isValidFile(stream); } } @@ -1371,11 +1370,10 @@ public static boolean isBlockCompressed(final Path path) throws IOException { * * @return {@code true} if the file has a block-compressed extension; {@code false} otherwise. */ - public static boolean hasBlockCompressedExtension (final String fileName) { + public static boolean hasBlockCompressedExtension(final String fileName) { String cleanedPath = stripQueryStringIfPathIsAnHttpUrl(fileName); for (final String extension : FileExtensions.BLOCK_COMPRESSED) { - if (cleanedPath.toLowerCase().endsWith(extension)) - return true; + if (cleanedPath.toLowerCase().endsWith(extension)) return true; } return false; } @@ -1398,7 +1396,7 @@ public static boolean hasBlockCompressedExtension(final Path path) { * * @return {@code true} if the file has a block-compressed extension; {@code false} otherwise. */ - public static boolean hasBlockCompressedExtension (final File file) { + public static boolean hasBlockCompressedExtension(final File file) { return hasBlockCompressedExtension(file.getName()); } @@ -1409,7 +1407,7 @@ public static boolean hasBlockCompressedExtension (final File file) { * * @return {@code true} if the file has a block-compressed extension; {@code false} otherwise. */ - public static boolean hasBlockCompressedExtension (final URI uri) { + public static boolean hasBlockCompressedExtension(final URI uri) { String path = uri.getPath(); return hasBlockCompressedExtension(path); } @@ -1421,7 +1419,7 @@ public static boolean hasBlockCompressedExtension (final URI uri) { * @return path with no trailing queryString (ex: http://something.com/path.vcf?stuff=something => http://something.com/path.vcf) */ private static String stripQueryStringIfPathIsAnHttpUrl(String path) { - if(path.startsWith("http://") || path.startsWith("https://")) { + if (path.startsWith("http://") || path.startsWith("https://")) { int qIdx = path.indexOf('?'); if (qIdx > 0) { return path.substring(0, qIdx); @@ -1436,7 +1434,7 @@ private static String stripQueryStringIfPathIsAnHttpUrl(String path) { * @param directory The directory to be deleted (along with its subdirectories) */ public static void recursiveDelete(final Path directory) { - + final SimpleFileVisitor simpleFileVisitor = new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { @@ -1455,7 +1453,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx try { Files.walkFileTree(directory, simpleFileVisitor); - } catch (final IOException e){ + } catch (final IOException e) { throw new RuntimeIOException(e); } } diff --git a/src/main/java/htsjdk/samtools/util/Interval.java b/src/main/java/htsjdk/samtools/util/Interval.java index 07007a48e6..05286cfb44 100644 --- a/src/main/java/htsjdk/samtools/util/Interval.java +++ b/src/main/java/htsjdk/samtools/util/Interval.java @@ -26,7 +26,6 @@ import htsjdk.samtools.SAMException; import htsjdk.tribble.NamedFeature; import htsjdk.tribble.annotation.Strand; - import java.util.Collection; /** @@ -106,7 +105,7 @@ public boolean isPositiveStrand() { /** * Return the {@link Strand} this interval is on. */ - public Strand getStrand(){ + public Strand getStrand() { return isNegativeStrand() ? Strand.NEGATIVE : Strand.FORWARD; } @@ -121,8 +120,8 @@ public String getName() { * Returns true if this interval overlaps the other interval, otherwise false. */ public boolean intersects(final Interval other) { - return (this.getContig().equals(other.getContig()) && - CoordMath.overlaps(this.getStart(), this.getEnd(), other.getStart(), other.getEnd())); + return (this.getContig().equals(other.getContig()) + && CoordMath.overlaps(this.getStart(), this.getEnd(), other.getStart(), other.getEnd())); } public int getIntersectionLength(final Interval other) { @@ -137,7 +136,8 @@ public int getIntersectionLength(final Interval other) { */ public Interval intersect(final Interval that) { if (!intersects(that)) throw new IllegalArgumentException(that + " does not intersect " + this); - return new Interval(this.getContig(), + return new Interval( + this.getContig(), Math.max(this.getStart(), that.getStart()), Math.min(this.getEnd(), that.getEnd()), this.negativeStrand, @@ -148,8 +148,8 @@ public Interval intersect(final Interval that) { * Returns true if this interval overlaps the other interval, otherwise false. */ public boolean abuts(final Interval other) { - return this.getContig().equals(other.getContig()) && - (this.getStart() == other.getEnd() + 1 || other.getStart() == this.getEnd() + 1); + return this.getContig().equals(other.getContig()) + && (this.getStart() == other.getEnd() + 1 || other.getStart() == this.getEnd() + 1); } /** @@ -163,16 +163,15 @@ public int length() { * Returns a new interval that is padded by the amount of bases specified on either side. */ public Interval pad(final int left, final int right) { - return new Interval(this.getContig(), this.getStart() - left, this.getEnd() + right, this.negativeStrand, this.name); + return new Interval( + this.getContig(), this.getStart() - left, this.getEnd() + right, this.negativeStrand, this.name); } /** * Counts the total number of bases a collection of intervals. */ public static long countBases(final Collection intervals) { - return intervals.stream() - .mapToLong(Interval::length) - .sum(); + return intervals.stream().mapToLong(Interval::length).sum(); } /** @@ -232,7 +231,8 @@ public int hashCode() { } public String toString() { - return getContig() + ":" + getStart() + "-" + getEnd() + "\t" + getStrand().encode() + "\t" + ((null == name) ? '.' : name); + return getContig() + ":" + getStart() + "-" + getEnd() + "\t" + + getStrand().encode() + "\t" + ((null == name) ? '.' : name); } @Override diff --git a/src/main/java/htsjdk/samtools/util/IntervalCodec.java b/src/main/java/htsjdk/samtools/util/IntervalCodec.java index 9120904ba6..d87a2badaa 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalCodec.java +++ b/src/main/java/htsjdk/samtools/util/IntervalCodec.java @@ -46,7 +46,6 @@ public IntervalCodec clone() { return new IntervalCodec(dict); } - /** * Sets the output stream that records will be written to. */ @@ -109,11 +108,10 @@ public Interval decode() { return null; } return new Interval( - dict.getSequence(sequenceIndex).getSequenceName(), - binaryCodec.readInt(), - binaryCodec.readInt(), - binaryCodec.readBoolean(), - (binaryCodec.readBoolean()) ? binaryCodec.readNullTerminatedString() : null - ); + dict.getSequence(sequenceIndex).getSequenceName(), + binaryCodec.readInt(), + binaryCodec.readInt(), + binaryCodec.readBoolean(), + (binaryCodec.readBoolean()) ? binaryCodec.readNullTerminatedString() : null); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/util/IntervalCoordinateComparator.java b/src/main/java/htsjdk/samtools/util/IntervalCoordinateComparator.java index a2485e9a14..3399908741 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalCoordinateComparator.java +++ b/src/main/java/htsjdk/samtools/util/IntervalCoordinateComparator.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMFileHeader; - import java.io.Serializable; import java.util.Comparator; diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java index 24fb8c4230..fd602133c4 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalList.java +++ b/src/main/java/htsjdk/samtools/util/IntervalList.java @@ -31,7 +31,6 @@ import htsjdk.tribble.IntervalList.IntervalListCodec; import htsjdk.tribble.MutableFeature; import htsjdk.utils.ValidationUtils; - import java.io.BufferedReader; import java.io.File; import java.io.IOException; @@ -110,7 +109,8 @@ public Iterator iterator() { * Adds an interval to the list of intervals. */ public void add(final Interval interval) { - ValidationUtils.nonNull(header.getSequence(interval.getContig()), + ValidationUtils.nonNull( + header.getSequence(interval.getContig()), () -> String.format("Cannot add interval %s, contig not in header", interval.toString())); this.intervals.add(interval); @@ -120,7 +120,7 @@ public void add(final Interval interval) { * Adds a Collection of intervals to the list of intervals. */ public void addall(final Collection intervals) { - //use this instead of addAll so that the contig checking happens. + // use this instead of addAll so that the contig checking happens. for (Interval interval : intervals) { add(interval); } @@ -239,14 +239,14 @@ public List getIntervals() { * Note: this function modifies the object in-place and is therefore difficult to work with. * * @return the set of unique intervals condensed from the contained intervals - * @deprecated use {@link #uniqued()#getIntervals()} instead. + * @deprecated use {@link #uniqued()}.{@link #getIntervals()} instead. */ @Deprecated public List getUniqueIntervals() { return getUniqueIntervals(true); } - //NO SIDE EFFECTS HERE! + // NO SIDE EFFECTS HERE! /** * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals() @@ -257,7 +257,7 @@ public static List getUniqueIntervals(final IntervalList list, final b return getUniqueIntervals(list, true, concatenateNames, false); } - //NO SIDE EFFECTS HERE! + // NO SIDE EFFECTS HERE! /** * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals() @@ -265,7 +265,8 @@ public static List getUniqueIntervals(final IntervalList list, final b * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. * @param enforceSameStrands enforce that merged intervals have the same strand, otherwise ignore. */ - public static List getUniqueIntervals(final IntervalList list, final boolean concatenateNames, final boolean enforceSameStrands) { + public static List getUniqueIntervals( + final IntervalList list, final boolean concatenateNames, final boolean enforceSameStrands) { return getUniqueIntervals(list, true, concatenateNames, enforceSameStrands); } @@ -276,7 +277,11 @@ public static List getUniqueIntervals(final IntervalList list, final b * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. * @param enforceSameStrands enforce that merged intervals have the same strand, otherwise ignore. */ - public static List getUniqueIntervals(final IntervalList list, final boolean combineAbuttingIntervals, final boolean concatenateNames, final boolean enforceSameStrands) { + public static List getUniqueIntervals( + final IntervalList list, + final boolean combineAbuttingIntervals, + final boolean concatenateNames, + final boolean enforceSameStrands) { final List intervals; if (list.getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) { @@ -285,10 +290,11 @@ public static List getUniqueIntervals(final IntervalList list, final b intervals = list.intervals; } - final IntervalMergerIterator mergeringIterator = new IntervalMergerIterator(intervals.iterator(), combineAbuttingIntervals, enforceSameStrands, concatenateNames); + final IntervalMergerIterator mergeringIterator = new IntervalMergerIterator( + intervals.iterator(), combineAbuttingIntervals, enforceSameStrands, concatenateNames); final List unique = new ArrayList<>(); - while(mergeringIterator.hasNext()){ + while (mergeringIterator.hasNext()) { unique.add(mergeringIterator.next()); } return unique; @@ -300,7 +306,7 @@ public static List getUniqueIntervals(final IntervalList list, final b * Note: this function modifies the object in-place and is therefore difficult to work with. * * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter. - * @deprecated use {@link #uniqued(boolean)#getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead. + * @deprecated use {@link #uniqued(boolean)}.{@link #getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead. */ @Deprecated public List getUniqueIntervals(final boolean concatenateNames) { @@ -325,7 +331,7 @@ public List getUniqueIntervals(final boolean concatenateNames) { public static List breakIntervalsAtBandMultiples(final List intervals, final int bandMultiple) { final List brokenUpIntervals = new ArrayList<>(); for (final Interval interval : intervals) { - if (interval.getEnd() >= interval.getStart()) { // Normal, non-empty intervals + if (interval.getEnd() >= interval.getStart()) { // Normal, non-empty intervals final int startIndex = interval.getStart() / bandMultiple; final int endIndex = interval.getEnd() / bandMultiple; if (startIndex == endIndex) { @@ -333,7 +339,7 @@ public static List breakIntervalsAtBandMultiples(final List } else { brokenUpIntervals.addAll(breakIntervalAtBandMultiples(interval, bandMultiple)); } - } else { // Special case - empty intervals ex: (100-99) + } else { // Special case - empty intervals ex: (100-99) brokenUpIntervals.add(interval); } } @@ -364,7 +370,12 @@ private static List breakIntervalAtBandMultiples(final Interval interv endPos = interval.getEnd(); } // add start/end to list of broken up intervals to return (and uniquely name it). - brokenUpIntervals.add(new Interval(interval.getContig(), startPos, endPos, interval.isNegativeStrand(), interval.getName() + "." + (startIndex - startOfIntervalIndex + 1))); + brokenUpIntervals.add(new Interval( + interval.getContig(), + startPos, + endPos, + interval.isNegativeStrand(), + interval.getName() + "." + (startIndex - startOfIntervalIndex + 1))); startIndex++; startPos = startIndex * bandMultiple; } @@ -457,7 +468,8 @@ public static IntervalList fromPath(final Path path) { try (final BufferedReader reader = IOUtil.openFileForBufferedReading(path)) { return fromReader(reader); } catch (final IOException e) { - throw new SAMException(String.format("Failed to close file %s after reading", path.toUri().toString())); + throw new SAMException(String.format( + "Failed to close file %s after reading", path.toUri().toString())); } } @@ -528,8 +540,7 @@ public static IntervalList fromReader(final BufferedReader in) { do { final Optional maybeInterval = Optional.ofNullable(intervalListCodec.decode(line)); maybeInterval.ifPresent(list.intervals::add); - } - while ((line = in.readLine()) != null); + } while ((line = in.readLine()) != null); return list; } catch (final IOException ioe) { @@ -570,22 +581,18 @@ public void write(final File file) { * @param list2 the second IntervalList * @return the intersection of list1 and list2. */ - public static IntervalList intersection(final IntervalList list1, final IntervalList list2) { // Ensure that all the sequence dictionaries agree and merge the lists SequenceUtil.assertSequenceDictionariesEqual( - list1.getHeader().getSequenceDictionary(), - list2.getHeader().getSequenceDictionary()); + list1.getHeader().getSequenceDictionary(), list2.getHeader().getSequenceDictionary()); final IntervalList result = new IntervalList(list1.getHeader().clone()); final OverlapDetector detector = OverlapDetector.create(list1.getIntervals()); for (final Interval i : list2.getIntervals()) { - detector.getOverlaps(i).stream() - .map(i::intersect) - .forEach(result::add); + detector.getOverlaps(i).stream().map(i::intersect).forEach(result::add); } return result.uniqued(); } @@ -596,11 +603,8 @@ public static IntervalList intersection(final IntervalList list1, final Interval * @param lists the list of IntervalList * @return the intersection of all the IntervalLists in lists. */ - public static IntervalList intersection(final Collection lists) { - return lists.stream() - .reduce(IntervalList::intersection) - .orElse(null); + return lists.stream().reduce(IntervalList::intersection).orElse(null); } /** @@ -629,8 +633,7 @@ public static IntervalList concatenate(final IntervalList list1, final IntervalL public IntervalList addOther(final IntervalList other) { SequenceUtil.assertSequenceDictionariesEqual( - this.getHeader().getSequenceDictionary(), - other.getHeader().getSequenceDictionary()); + this.getHeader().getSequenceDictionary(), other.getHeader().getSequenceDictionary()); this.header.setSortOrder(SAMFileHeader.SortOrder.unsorted); this.addall(other.intervals); return this; @@ -648,11 +651,9 @@ public static IntervalList concatenate(final Collection lists) { final SAMFileHeader header = lists.stream() .findFirst() .map(IntervalList::getHeader) - .orElseThrow( - () -> new IllegalArgumentException("Cannot combine empty collection of IntervalLists")); + .orElseThrow(() -> new IllegalArgumentException("Cannot combine empty collection of IntervalLists")); - return lists.stream() - .reduce(new IntervalList(header), IntervalList::addOther, IntervalList::concatenate); + return lists.stream().reduce(new IntervalList(header), IntervalList::addOther, IntervalList::concatenate); } /** @@ -683,10 +684,12 @@ public static IntervalList invert(final IntervalList list) { final ListMap map = new ListMap<>(); - //add all the intervals (uniqued and therefore also sorted) to a ListMap from sequenceIndex to a list of Intervals + // add all the intervals (uniqued and therefore also sorted) to a ListMap from sequenceIndex to a list of + // Intervals for (final Interval i : list.uniqued().getIntervals()) { final int sequenceIndex = list.getHeader().getSequenceIndex(i.getContig()); - ValidationUtils.validateArg(sequenceIndex != SAMSequenceRecord.UNAVAILABLE_SEQUENCE_INDEX, + ValidationUtils.validateArg( + sequenceIndex != SAMSequenceRecord.UNAVAILABLE_SEQUENCE_INDEX, () -> String.format("Cannot add interval %s, contig not in header", i.toString())); map.add(sequenceIndex, i); } @@ -694,29 +697,37 @@ public static IntervalList invert(final IntervalList list) { // a counter to supply newly-created intervals with a name int intervals = 0; - //iterate over the contigs in the dictionary - for (final SAMSequenceRecord samSequenceRecord : list.getHeader().getSequenceDictionary().getSequences()) { + // iterate over the contigs in the dictionary + for (final SAMSequenceRecord samSequenceRecord : + list.getHeader().getSequenceDictionary().getSequences()) { final Integer sequenceIndex = samSequenceRecord.getSequenceIndex(); final String sequenceName = samSequenceRecord.getSequenceName(); final int sequenceLength = samSequenceRecord.getSequenceLength(); - int lastCoveredPosition = 0; //start at beginning of sequence - //iterate over list of intervals that are in sequence + int lastCoveredPosition = 0; // start at beginning of sequence + // iterate over list of intervals that are in sequence if (map.containsKey(sequenceIndex)) { // if there are intervals in the ListMap on this contig, iterate over them (in order) for (final Interval i : map.get(sequenceIndex)) { if (i.getStart() > lastCoveredPosition + 1) { - //if there's space between the last interval and the current one, add an interval between them - inverse.add(new Interval(sequenceName, lastCoveredPosition + 1, i.getStart() - 1, false, "interval-" + (++intervals))); + // if there's space between the last interval and the current one, add an interval between them + inverse.add(new Interval( + sequenceName, + lastCoveredPosition + 1, + i.getStart() - 1, + false, + "interval-" + (++intervals))); } - lastCoveredPosition = i.getEnd(); //update the last covered position + lastCoveredPosition = i.getEnd(); // update the last covered position } } if (sequenceLength > lastCoveredPosition) { // finally, if there's space between the last interval and the next - // one, add an interval. This also covers the case that there are no intervals in the ListMap for a contig. - inverse.add(new Interval(sequenceName, lastCoveredPosition + 1, sequenceLength, false, "interval-" + (++intervals))); + // one, add an interval. This also covers the case that there are no intervals in the ListMap for a + // contig. + inverse.add(new Interval( + sequenceName, lastCoveredPosition + 1, sequenceLength, false, "interval-" + (++intervals))); } } @@ -744,9 +755,7 @@ public static IntervalList subtract(final IntervalList lhs, final IntervalList r * @return an IntervalList comprising all loci that are in the first collection but not the second lhs-rhs=answer. */ public static IntervalList subtract(final Collection lhs, final Collection rhs) { - return subtract( - union(lhs), - union(rhs)); + return subtract(union(lhs), union(rhs)); } /** @@ -756,7 +765,8 @@ public static IntervalList subtract(final Collection lhs, final Co * @param lists2 the second collection of IntervalLists * @return the difference between the two intervals, i.e. the loci that are only in one IntervalList but not both */ - public static IntervalList difference(final Collection lists1, final Collection lists2) { + public static IntervalList difference( + final Collection lists1, final Collection lists2) { return difference(union(lists1), union(lists2)); } @@ -768,9 +778,7 @@ public static IntervalList difference(final Collection lists1, fin * @return the difference between the two intervals, i.e. the loci that are only in one IntervalList but not both */ public static IntervalList difference(final IntervalList list1, final IntervalList list2) { - return union( - subtract(list1, list2), - subtract(list2, list1)); + return union(subtract(list1, list2), subtract(list2, list1)); } /** @@ -785,8 +793,8 @@ public static IntervalList difference(final IntervalList list1, final IntervalLi public static IntervalList overlaps(final IntervalList lhs, final IntervalList rhs) { final SAMFileHeader header = lhs.getHeader().clone(); - SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), - rhs.getHeader().getSequenceDictionary()); + SequenceUtil.assertSequenceDictionariesEqual( + header.getSequenceDictionary(), rhs.getHeader().getSequenceDictionary()); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); @@ -822,7 +830,8 @@ public static IntervalList overlaps(final IntervalList lhs, final IntervalList r */ public static IntervalList overlaps(final Collection lists1, final Collection lists2) { if (lists1.isEmpty()) { - throw new SAMException("Cannot call overlaps with the first collection having empty list of IntervalLists."); + throw new SAMException( + "Cannot call overlaps with the first collection having empty list of IntervalLists."); } return overlaps(concatenate(lists1), union(lists2)); } @@ -866,7 +875,11 @@ public static class IntervalMergerIterator implements Iterator { boolean currentStrandNegative = false; String currentFirstName = null; - public IntervalMergerIterator(Iterator intervals, final boolean combineAbuttingIntervals, final boolean enforceSameStrand, final boolean concatenateNames) { + public IntervalMergerIterator( + Iterator intervals, + final boolean combineAbuttingIntervals, + final boolean enforceSameStrand, + final boolean concatenateNames) { this.inputIntervals = intervals; this.combineAbuttingIntervals = combineAbuttingIntervals; @@ -898,9 +911,10 @@ private Interval getNext() { current = new MutableFeature(next); currentStrandNegative = next.isNegativeStrand(); currentFirstName = next.getName(); - } else if (current.overlaps(next) || (combineAbuttingIntervals && current.withinDistanceOf(next,1))) { + } else if (current.overlaps(next) || (combineAbuttingIntervals && current.withinDistanceOf(next, 1))) { if (enforceSameStrands && currentStrandNegative != next.isNegativeStrand()) { - throw new SAMException("Strands were not equal for: " + current.toString() + " and " + next.toString()); + throw new SAMException( + "Strands were not equal for: " + current.toString() + " and " + next.toString()); } if (concatenateNames) { toBeMerged.add(next); @@ -908,8 +922,14 @@ private Interval getNext() { current.end = Math.max(current.getEnd(), next.getEnd()); } else { // Emit merged/unique interval - final Interval retVal = concatenateNames ? merge(toBeMerged, concatenateNames) : - new Interval(current.getContig(), current.getStart(), current.getEnd(), currentStrandNegative, currentFirstName); + final Interval retVal = concatenateNames + ? merge(toBeMerged, concatenateNames) + : new Interval( + current.getContig(), + current.getStart(), + current.getEnd(), + currentStrandNegative, + currentFirstName); toBeMerged.clear(); current.setAll(next); currentStrandNegative = next.isNegativeStrand(); @@ -921,12 +941,17 @@ private Interval getNext() { } } // Emit merged/unique interval - final Interval retVal = concatenateNames ? merge(toBeMerged, concatenateNames) : - new Interval(current.getContig(), current.getStart(), current.getEnd(), currentStrandNegative, currentFirstName); + final Interval retVal = concatenateNames + ? merge(toBeMerged, concatenateNames) + : new Interval( + current.getContig(), + current.getStart(), + current.getEnd(), + currentStrandNegative, + currentFirstName); toBeMerged.clear(); current = null; return retVal; } } } - diff --git a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java index 9e76ffa671..3f2f156eab 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java +++ b/src/main/java/htsjdk/samtools/util/IntervalListReferenceSequenceMask.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMFileHeader; - import java.util.BitSet; import java.util.List; @@ -83,8 +82,8 @@ public int nextPosition(final int sequenceIndex, final int position) { private void ensureSequenceLoaded(final int sequenceIndex) { if (sequenceIndex < this.currentSequenceIndex) { - throw new IllegalArgumentException("Cannot look at an earlier sequence. Current: " + - this.currentSequenceIndex + "; requested: " + sequenceIndex); + throw new IllegalArgumentException("Cannot look at an earlier sequence. Current: " + + this.currentSequenceIndex + "; requested: " + sequenceIndex); } if (sequenceIndex > currentSequenceIndex) { currentBitSet.clear(); diff --git a/src/main/java/htsjdk/samtools/util/IntervalListWriter.java b/src/main/java/htsjdk/samtools/util/IntervalListWriter.java index 6baa257d1e..441bf699b5 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalListWriter.java +++ b/src/main/java/htsjdk/samtools/util/IntervalListWriter.java @@ -25,11 +25,8 @@ import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMTextHeaderCodec; -import htsjdk.tribble.annotation.Strand; - import java.io.BufferedWriter; import java.io.Closeable; -import java.io.File; import java.io.IOException; import java.nio.file.Path; diff --git a/src/main/java/htsjdk/samtools/util/IntervalTree.java b/src/main/java/htsjdk/samtools/util/IntervalTree.java index 9e4f03bf11..630d8c9877 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalTree.java +++ b/src/main/java/htsjdk/samtools/util/IntervalTree.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.utils.ValidationUtils; - import java.util.ConcurrentModificationException; import java.util.Iterator; import java.util.NoSuchElementException; @@ -41,22 +40,19 @@ * * @author tsharpe */ -public class IntervalTree implements Iterable> -{ +public class IntervalTree implements Iterable> { /** * Return the number of intervals in the tree. * @return The number of intervals. */ - public int size() - { + public int size() { return mRoot == null ? 0 : mRoot.getSize(); } /** * Remove all entries. */ - public void clear() - { + public void clear() { mRoot = null; } @@ -68,48 +64,36 @@ public void clear() * @param value The associated value. * @return The old value associated with that interval, or the sentinel. */ - public V put( final int start, final int end, final V value ) - { - if ( start > end ) + public V put(final int start, final int end, final V value) { + if (start > end) throw new IllegalArgumentException("Start cannot exceed end. (start=" + start + ", end=" + end + ")"); V result = mSentinel; - if ( mRoot == null ) - { - mRoot = new Node(start,end,value); - } - else - { + if (mRoot == null) { + mRoot = new Node(start, end, value); + } else { Node parent = null; Node node = mRoot; int cmpVal = 0; - while ( node != null ) - { + while (node != null) { parent = node; // last non-null node - cmpVal = node.compare(start,end); - if ( cmpVal == 0 ) - { + cmpVal = node.compare(start, end); + if (cmpVal == 0) { break; } node = cmpVal < 0 ? node.getLeft() : node.getRight(); } - if ( cmpVal == 0 ) - { + if (cmpVal == 0) { result = parent.setValue(value); - } - else - { - if ( cmpVal < 0 ) - { - mRoot = parent.insertLeft(start,end,value,mRoot); - } - else - { - mRoot = parent.insertRight(start,end,value,mRoot); + } else { + if (cmpVal < 0) { + mRoot = parent.insertLeft(start, end, value, mRoot); + } else { + mRoot = parent.insertRight(start, end, value, mRoot); } } } @@ -132,8 +116,9 @@ public V put( final int start, final int end, final V value ) * @return the updated value that is stored in the tree after the completion of this merge operation, this will * be the sentinel value if nothing ended up being stored */ - public V merge(int start, int end, V value, BiFunction remappingFunction) { - ValidationUtils.validateArg(!Objects.equals(value, mSentinel), "Values equal to the sentinel value may not be merged"); + public V merge(int start, int end, V value, BiFunction remappingFunction) { + ValidationUtils.validateArg( + !Objects.equals(value, mSentinel), "Values equal to the sentinel value may not be merged"); final V alreadyPresent = put(start, end, value); if (!Objects.equals(alreadyPresent, mSentinel)) { final V newComputedValue = remappingFunction.apply(value, alreadyPresent); @@ -154,16 +139,13 @@ public V merge(int start, int end, V value, BiFunction node = mRoot; - while ( node != null ) - { - final int cmpVal = node.compare(start,end); - if ( cmpVal == 0 ) - { + while (node != null) { + final int cmpVal = node.compare(start, end); + if (cmpVal == 0) { result = node.getValue(); mRoot = node.remove(mRoot); break; @@ -181,15 +163,12 @@ public V remove( final int start, final int end ) * @param end The interval's end. * @return The Node that represents that interval, or null. */ - public Node find( final int start, final int end ) - { + public Node find(final int start, final int end) { Node node = mRoot; - while ( node != null ) - { - final int cmpVal = node.compare(start,end); - if ( cmpVal == 0 ) - { + while (node != null) { + final int cmpVal = node.compare(start, end); + if (cmpVal == 0) { break; } @@ -204,9 +183,8 @@ public Node find( final int start, final int end ) * @param idx The rank of the interval sought (from 0 to size()-1). * @return The Node that represents the nth interval. */ - public Node findByIndex( final int idx ) - { - return Node.findByRank(mRoot,idx+1); + public Node findByIndex(final int idx) { + return Node.findByRank(mRoot, idx + 1); } /** @@ -216,22 +194,19 @@ public Node findByIndex( final int idx ) * @param end The interval's end. * @return The rank of that interval, or -1. */ - public int getIndex( final int start, final int end ) - { - return Node.getRank(mRoot,start,end) - 1; + public int getIndex(final int start, final int end) { + return Node.getRank(mRoot, start, end) - 1; } /** * Find the least interval in the tree. * @return The earliest interval, or null if the tree is empty. */ - public Node min() - { + public Node min() { Node result = null; Node node = mRoot; - while ( node != null ) - { + while (node != null) { result = node; node = node.getLeft(); } @@ -246,26 +221,22 @@ public Node min() * @return The earliest >= interval, or null if there is none. */ @SuppressWarnings("null") - public Node min( final int start, final int end ) - { + public Node min(final int start, final int end) { Node result = null; Node node = mRoot; int cmpVal = 0; - while ( node != null ) - { + while (node != null) { result = node; - cmpVal = node.compare(start,end); - if ( cmpVal == 0 ) - { + cmpVal = node.compare(start, end); + if (cmpVal == 0) { break; } node = cmpVal < 0 ? node.getLeft() : node.getRight(); } - if ( cmpVal > 0 ) - { + if (cmpVal > 0) { result = result.getNext(); } @@ -278,37 +249,29 @@ public Node min( final int start, final int end ) * @param end The interval's end. * @return The earliest overlapping interval, or null if there is none. */ - public Node minOverlapper( final int start, final int end ) - { + public Node minOverlapper(final int start, final int end) { Node result = null; Node node = mRoot; - if ( node != null && node.getMaxEnd() >= start ) - { - while ( true ) - { - if ( node.getStart() <= end && start <= node.getEnd() ) - { // this node overlaps. there might be a lesser overlapper down the left sub-tree. - // no need to consider the right sub-tree: even if there's an overlapper, if won't be minimal + if (node != null && node.getMaxEnd() >= start) { + while (true) { + if (node.getStart() <= end + && start <= node.getEnd()) { // this node overlaps. there might be a lesser overlapper down the + // left sub-tree. + // no need to consider the right sub-tree: even if there's an overlapper, if won't be minimal result = node; node = node.getLeft(); - if ( node == null || node.getMaxEnd() < start ) - break; // no left sub-tree or all nodes end too early - } - else - { // no overlap. if there might be a left sub-tree overlapper, consider the left sub-tree. + if (node == null || node.getMaxEnd() < start) break; // no left sub-tree or all nodes end too early + } else { // no overlap. if there might be a left sub-tree overlapper, consider the left sub-tree. final Node left = node.getLeft(); - if ( left != null && left.getMaxEnd() >= start ) - { + if (left != null && left.getMaxEnd() >= start) { node = left; - } - else - { // left sub-tree cannot contain an overlapper. consider the right sub-tree. - if ( node.getStart() > end ) + } else { // left sub-tree cannot contain an overlapper. consider the right sub-tree. + if (node.getStart() > end) break; // everything in the right sub-tree is past the end of the query interval node = node.getRight(); - if ( node == null || node.getMaxEnd() < start ) + if (node == null || node.getMaxEnd() < start) break; // no right sub-tree or all nodes end too early } } @@ -322,13 +285,11 @@ public Node minOverlapper( final int start, final int end ) * Find the greatest interval in the tree. * @return The latest interval, or null if the tree is empty. */ - public Node max() - { + public Node max() { Node result = null; Node node = mRoot; - while ( node != null ) - { + while (node != null) { result = node; node = node.getRight(); } @@ -343,26 +304,22 @@ public Node max() * @return The latest >= interval, or null if there is none. */ @SuppressWarnings("null") - public Node max( final int start, final int end ) - { + public Node max(final int start, final int end) { Node result = null; Node node = mRoot; int cmpVal = 0; - while ( node != null ) - { + while (node != null) { result = node; - cmpVal = node.compare(start,end); - if ( cmpVal == 0 ) - { + cmpVal = node.compare(start, end); + if (cmpVal == 0) { break; } node = cmpVal < 0 ? node.getLeft() : node.getRight(); } - if ( cmpVal < 0 ) - { + if (cmpVal < 0) { result = result.getPrev(); } @@ -374,8 +331,7 @@ public Node max( final int start, final int end ) * @return An iterator. */ @Override - public Iterator> iterator() - { + public Iterator> iterator() { return new FwdIterator(min()); } @@ -385,9 +341,8 @@ public Iterator> iterator() * @param end The interval's end. * @return An iterator. */ - public Iterator> iterator( final int start, final int end ) - { - return new FwdIterator(min(start,end)); + public Iterator> iterator(final int start, final int end) { + return new FwdIterator(min(start, end)); } /** @@ -396,17 +351,15 @@ public Iterator> iterator( final int start, final int end ) * @param end The range end. * @return An iterator. */ - public Iterator> overlappers( final int start, final int end ) - { - return new OverlapIterator(start,end); + public Iterator> overlappers(final int start, final int end) { + return new OverlapIterator(start, end); } /** * Return an iterator over the entire tree that returns intervals in reverse order. * @return An iterator. */ - public Iterator> reverseIterator() - { + public Iterator> reverseIterator() { return new RevIterator(max()); } @@ -416,9 +369,8 @@ public Iterator> reverseIterator() * @param end The interval's end. * @return An iterator. */ - public Iterator> reverseIterator( final int start, final int end ) - { - return new RevIterator(max(start,end)); + public Iterator> reverseIterator(final int start, final int end) { + return new RevIterator(max(start, end)); } /** @@ -426,8 +378,7 @@ public Iterator> reverseIterator( final int start, final int end ) * into the tree, or to signal "not found" when removing an interval. This is null by default. * @return The sentinel value. */ - public V getSentinel() - { + public V getSentinel() { return mSentinel; } @@ -437,8 +388,7 @@ public V getSentinel() * @param sentinel The new sentinel value. * @return The old sentinel value. */ - public V setSentinel( final V sentinel ) - { + public V setSentinel(final V sentinel) { final V result = mSentinel; mSentinel = sentinel; return result; @@ -461,16 +411,14 @@ public void printTree() { if (mRoot != null) mRoot.printNode(); } - void removeNode( final Node node ) - { + void removeNode(final Node node) { mRoot = node.remove(mRoot); } private Node mRoot; private V mSentinel; - public static class Node - { + public static class Node { // bit-wise definitions from which the other constants are composed public static final int HAS_LESSER_PART = 1; public static final int HAS_OVERLAPPING_PART = 2; @@ -484,8 +432,7 @@ public static class Node public static final int IS_RIGHT_OVERHANGING_OVERLAPPER = HAS_GREATER_PART | HAS_OVERLAPPING_PART; // 6 public static final int IS_SUPERSET = HAS_LESSER_PART | HAS_OVERLAPPING_PART | HAS_GREATER_PART; // 7 - Node( final int start, final int end, final V1 value ) - { + Node(final int start, final int end, final V1 value) { mStart = start; mEnd = end; mValue = value; @@ -494,8 +441,7 @@ public static class Node mIsBlack = true; } - Node( final Node parent, final int start, final int end, final V1 value ) - { + Node(final Node parent, final int start, final int end, final V1 value) { mParent = parent; mStart = start; mEnd = end; @@ -504,100 +450,78 @@ public static class Node mSize = 1; } - public int getStart() - { + public int getStart() { return mStart; } - public int getEnd() - { + public int getEnd() { return mEnd; } - public int getLength() - { - return mEnd - mStart + 1 ; + public int getLength() { + return mEnd - mStart + 1; } - public int getRelationship( final Node interval ) - { + public int getRelationship(final Node interval) { int result = 0; - if ( mStart < interval.getStart() ) - result = HAS_LESSER_PART; - if ( mEnd > interval.getEnd() ) - result |= HAS_GREATER_PART; - if ( mStart <= interval.getEnd() && interval.getStart() <= mEnd ) - result |= HAS_OVERLAPPING_PART; + if (mStart < interval.getStart()) result = HAS_LESSER_PART; + if (mEnd > interval.getEnd()) result |= HAS_GREATER_PART; + if (mStart <= interval.getEnd() && interval.getStart() <= mEnd) result |= HAS_OVERLAPPING_PART; return result; } - public boolean isAdjacent( final Node interval ) - { + public boolean isAdjacent(final Node interval) { return mStart == interval.getEnd() + 1 || mEnd + 1 == interval.getStart(); } - public V1 getValue() - { + public V1 getValue() { return mValue; } - public V1 setValue( final V1 value ) - { + public V1 setValue(final V1 value) { final V1 result = mValue; mValue = value; return result; } - int getSize() - { + int getSize() { return mSize; } - int getMaxEnd() - { + int getMaxEnd() { return mMaxEnd; } - Node getLeft() - { + Node getLeft() { return mLeft; } - Node insertLeft( final int start, final int end, final V1 value, final Node root ) - { - mLeft = new Node(this,start,end,value); - return insertFixup(mLeft,root); + Node insertLeft(final int start, final int end, final V1 value, final Node root) { + mLeft = new Node(this, start, end, value); + return insertFixup(mLeft, root); } - Node getRight() - { + Node getRight() { return mRight; } - Node insertRight( final int start, final int end, final V1 value, final Node root ) - { - mRight = new Node(this,start,end,value); - return insertFixup(mRight,root); + Node insertRight(final int start, final int end, final V1 value, final Node root) { + mRight = new Node(this, start, end, value); + return insertFixup(mRight, root); } - Node getNext() - { + Node getNext() { Node result; - if ( mRight != null ) - { + if (mRight != null) { result = mRight; - while ( result.mLeft != null ) - { + while (result.mLeft != null) { result = result.mLeft; } - } - else - { + } else { Node node = this; result = mParent; - while ( result != null && node == result.mRight ) - { + while (result != null && node == result.mRight) { node = result; result = result.mParent; } @@ -606,24 +530,18 @@ Node getNext() return result; } - Node getPrev() - { + Node getPrev() { Node result; - if ( mLeft != null ) - { + if (mLeft != null) { result = mLeft; - while ( result.mRight != null ) - { + while (result.mRight != null) { result = result.mRight; } - } - else - { + } else { Node node = this; result = mParent; - while ( result != null && node == result.mLeft ) - { + while (result != null && node == result.mLeft) { node = result; result = result.mParent; } @@ -632,72 +550,49 @@ Node getPrev() return result; } - boolean wasRemoved() - { + boolean wasRemoved() { return mSize == 0; } - Node remove( Node root ) - { - if ( mSize == 0 ) - { + Node remove(Node root) { + if (mSize == 0) { throw new IllegalStateException("Entry was already removed."); } - if ( mLeft == null ) - { - if ( mRight == null ) - { // no children - if ( mParent == null ) - { + if (mLeft == null) { + if (mRight == null) { // no children + if (mParent == null) { root = null; - } - else if ( mParent.mLeft == this ) - { + } else if (mParent.mLeft == this) { mParent.mLeft = null; fixup(mParent); - if ( mIsBlack ) - root = removeFixup(mParent,null,root); - } - else - { + if (mIsBlack) root = removeFixup(mParent, null, root); + } else { mParent.mRight = null; fixup(mParent); - if ( mIsBlack ) - root = removeFixup(mParent,null,root); + if (mIsBlack) root = removeFixup(mParent, null, root); } + } else { // single child on right + root = spliceOut(mRight, root); } - else - { // single child on right - root = spliceOut(mRight,root); - } - } - else if ( mRight == null ) - { // single child on left - root = spliceOut(mLeft,root); - } - else - { // two children + } else if (mRight == null) { // single child on left + root = spliceOut(mLeft, root); + } else { // two children final Node next = getNext(); root = next.remove(root); // put next into tree in same position as this, effectively removing this - if ( (next.mParent = mParent) == null ) - root = next; - else if ( mParent.mLeft == this ) - mParent.mLeft = next; - else - mParent.mRight = next; - - if ( (next.mLeft = mLeft) != null ) - { + if ((next.mParent = mParent) == null) root = next; + else if (mParent.mLeft == this) mParent.mLeft = next; + else mParent.mRight = next; + + if ((next.mLeft = mLeft) != null) { mLeft.mParent = next; } - if ( (next.mRight = mRight) != null ) - { + if ((next.mRight = mRight) != null) { mRight.mParent = next; } @@ -713,63 +608,43 @@ else if ( mParent.mLeft == this ) } // backwards comparison! compares start+end to this. - int compare( final int start, final int end ) - { + int compare(final int start, final int end) { int result = 0; - if ( start > mStart ) - result = 1; - else if ( start < mStart ) - result = -1; - else if ( end > mEnd ) - result = 1; - else if ( end < mEnd ) - result = -1; + if (start > mStart) result = 1; + else if (start < mStart) result = -1; + else if (end > mEnd) result = 1; + else if (end < mEnd) result = -1; return result; } @SuppressWarnings("null") - static Node getNextOverlapper( Node node, final int start, final int end ) - { - do - { + static Node getNextOverlapper(Node node, final int start, final int end) { + do { Node nextNode = node.mRight; - if ( nextNode != null && nextNode.mMaxEnd >= start ) - { + if (nextNode != null && nextNode.mMaxEnd >= start) { node = nextNode; - while ( (nextNode = node.mLeft) != null && nextNode.mMaxEnd >= start ) - node = nextNode; - } - else - { + while ((nextNode = node.mLeft) != null && nextNode.mMaxEnd >= start) node = nextNode; + } else { nextNode = node; - while ( (node = nextNode.mParent) != null && node.mRight == nextNode ) - nextNode = node; + while ((node = nextNode.mParent) != null && node.mRight == nextNode) nextNode = node; } - if ( node != null && node.mStart > end ) - node = null; - } - while ( node != null && !(node.mStart <= end && start <= node.mEnd) ); + if (node != null && node.mStart > end) node = null; + } while (node != null && !(node.mStart <= end && start <= node.mEnd)); return node; } - static Node findByRank( Node node, int rank ) - { - while ( node != null ) - { + static Node findByRank(Node node, int rank) { + while (node != null) { final int nodeRank = node.getRank(); - if ( rank == nodeRank ) - break; + if (rank == nodeRank) break; - if ( rank < nodeRank ) - { + if (rank < nodeRank) { node = node.mLeft; - } - else - { + } else { node = node.mRight; rank -= nodeRank; } @@ -778,22 +653,16 @@ static Node findByRank( Node node, int rank ) return node; } - static int getRank( Node node, final int start, final int end ) - { + static int getRank(Node node, final int start, final int end) { int rank = 0; - while ( node != null ) - { - final int cmpVal = node.compare(start,end); - if ( cmpVal < 0 ) - { + while (node != null) { + final int cmpVal = node.compare(start, end); + if (cmpVal < 0) { node = node.mLeft; - } - else - { + } else { rank += node.getRank(); - if ( cmpVal == 0 ) - return rank; // EARLY RETURN!!! + if (cmpVal == 0) return rank; // EARLY RETURN!!! node = node.mRight; } @@ -802,56 +671,42 @@ static int getRank( Node node, final int start, final int end ) return 0; } - private int getRank() - { + private int getRank() { int result = 1; - if ( mLeft != null ) - result = mLeft.mSize + 1; + if (mLeft != null) result = mLeft.mSize + 1; return result; } - private Node spliceOut( final Node child, Node root ) - { - if ( (child.mParent = mParent) == null ) - { + private Node spliceOut(final Node child, Node root) { + if ((child.mParent = mParent) == null) { root = child; child.mIsBlack = true; - } - else - { - if ( mParent.mLeft == this ) - mParent.mLeft = child; - else - mParent.mRight = child; + } else { + if (mParent.mLeft == this) mParent.mLeft = child; + else mParent.mRight = child; fixup(mParent); - if ( mIsBlack ) - root = removeFixup(mParent,child,root); + if (mIsBlack) root = removeFixup(mParent, child, root); } return root; } - private Node rotateLeft( Node root ) - { + private Node rotateLeft(Node root) { final Node child = mRight; final int childSize = child.mSize; child.mSize = mSize; mSize -= childSize; - if ( (mRight = child.mLeft) != null ) - { + if ((mRight = child.mLeft) != null) { mRight.mParent = this; mSize += mRight.mSize; } - if ( (child.mParent = mParent) == null ) - root = child; - else if ( this == mParent.mLeft ) - mParent.mLeft = child; - else - mParent.mRight = child; + if ((child.mParent = mParent) == null) root = child; + else if (this == mParent.mLeft) mParent.mLeft = child; + else mParent.mRight = child; child.mLeft = this; mParent = child; @@ -862,26 +717,21 @@ else if ( this == mParent.mLeft ) return root; } - private Node rotateRight( Node root ) - { + private Node rotateRight(Node root) { final Node child = mLeft; final int childSize = child.mSize; child.mSize = mSize; mSize -= childSize; - if ( (mLeft = child.mRight) != null ) - { + if ((mLeft = child.mRight) != null) { mLeft.mParent = this; mSize += mLeft.mSize; } - if ( (child.mParent = mParent) == null ) - root = child; - else if ( this == mParent.mLeft ) - mParent.mLeft = child; - else - mParent.mRight = child; + if ((child.mParent = mParent) == null) root = child; + else if (this == mParent.mLeft) mParent.mLeft = child; + else mParent.mRight = child; child.mRight = this; mParent = child; @@ -892,58 +742,43 @@ else if ( this == mParent.mLeft ) return root; } - private void setMaxEnd() - { + private void setMaxEnd() { mMaxEnd = mEnd; - if ( mLeft != null ) - mMaxEnd = Math.max(mMaxEnd,mLeft.mMaxEnd); - if ( mRight != null ) - mMaxEnd = Math.max(mMaxEnd,mRight.mMaxEnd); + if (mLeft != null) mMaxEnd = Math.max(mMaxEnd, mLeft.mMaxEnd); + if (mRight != null) mMaxEnd = Math.max(mMaxEnd, mRight.mMaxEnd); } - private static void fixup( Node node ) - { - do - { + private static void fixup(Node node) { + do { node.mSize = 1; node.mMaxEnd = node.mEnd; - if ( node.mLeft != null ) - { + if (node.mLeft != null) { node.mSize += node.mLeft.mSize; - node.mMaxEnd = Math.max(node.mMaxEnd,node.mLeft.mMaxEnd); + node.mMaxEnd = Math.max(node.mMaxEnd, node.mLeft.mMaxEnd); } - if ( node.mRight != null ) - { + if (node.mRight != null) { node.mSize += node.mRight.mSize; - node.mMaxEnd = Math.max(node.mMaxEnd,node.mRight.mMaxEnd); + node.mMaxEnd = Math.max(node.mMaxEnd, node.mRight.mMaxEnd); } - } - while ( (node = node.mParent) != null ); + } while ((node = node.mParent) != null); } - private static Node insertFixup( Node daughter, Node root ) - { + private static Node insertFixup(Node daughter, Node root) { Node mom = daughter.mParent; fixup(mom); - while( mom != null && !mom.mIsBlack ) - { + while (mom != null && !mom.mIsBlack) { final Node gramma = mom.mParent; Node auntie = gramma.mLeft; - if ( auntie == mom ) - { + if (auntie == mom) { auntie = gramma.mRight; - if ( auntie != null && !auntie.mIsBlack ) - { + if (auntie != null && !auntie.mIsBlack) { mom.mIsBlack = true; auntie.mIsBlack = true; gramma.mIsBlack = false; daughter = gramma; - } - else - { - if ( daughter == mom.mRight ) - { + } else { + if (daughter == mom.mRight) { root = mom.rotateLeft(root); mom = daughter; } @@ -952,20 +787,14 @@ private static Node insertFixup( Node daughter, Node root ) root = gramma.rotateRight(root); break; } - } - else - { - if ( auntie != null && !auntie.mIsBlack ) - { + } else { + if (auntie != null && !auntie.mIsBlack) { mom.mIsBlack = true; auntie.mIsBlack = true; gramma.mIsBlack = false; daughter = gramma; - } - else - { - if ( daughter == mom.mLeft ) - { + } else { + if (daughter == mom.mLeft) { root = mom.rotateRight(root); mom = daughter; } @@ -981,29 +810,22 @@ private static Node insertFixup( Node daughter, Node root ) return root; } - private static Node removeFixup( Node parent, Node node, Node root ) - { - do - { - if ( node == parent.mLeft ) - { + private static Node removeFixup(Node parent, Node node, Node root) { + do { + if (node == parent.mLeft) { Node sister = parent.mRight; - if ( !sister.mIsBlack ) - { + if (!sister.mIsBlack) { sister.mIsBlack = true; parent.mIsBlack = false; root = parent.rotateLeft(root); sister = parent.mRight; } - if ( (sister.mLeft == null || sister.mLeft.mIsBlack) && (sister.mRight == null || sister.mRight.mIsBlack) ) - { + if ((sister.mLeft == null || sister.mLeft.mIsBlack) + && (sister.mRight == null || sister.mRight.mIsBlack)) { sister.mIsBlack = false; node = parent; - } - else - { - if ( sister.mRight == null || sister.mRight.mIsBlack ) - { + } else { + if (sister.mRight == null || sister.mRight.mIsBlack) { sister.mLeft.mIsBlack = true; sister.mIsBlack = false; root = sister.rotateRight(root); @@ -1015,26 +837,20 @@ private static Node removeFixup( Node parent, Node node, Node sister = parent.mLeft; - if ( !sister.mIsBlack ) - { + if (!sister.mIsBlack) { sister.mIsBlack = true; parent.mIsBlack = false; root = parent.rotateRight(root); sister = parent.mLeft; } - if ( (sister.mLeft == null || sister.mLeft.mIsBlack) && (sister.mRight == null || sister.mRight.mIsBlack) ) - { + if ((sister.mLeft == null || sister.mLeft.mIsBlack) + && (sister.mRight == null || sister.mRight.mIsBlack)) { sister.mIsBlack = false; node = parent; - } - else - { - if ( sister.mLeft == null || sister.mLeft.mIsBlack ) - { + } else { + if (sister.mLeft == null || sister.mLeft.mIsBlack) { sister.mRight.mIsBlack = true; sister.mIsBlack = false; root = sister.rotateLeft(root); @@ -1048,8 +864,7 @@ private static Node removeFixup( Node parent, Node node, Node> - { - public FwdIterator( final Node node ) - { + public class FwdIterator implements Iterator> { + public FwdIterator(final Node node) { mNext = node; } @Override - public boolean hasNext() - { + public boolean hasNext() { return mNext != null; } @Override - public Node next() - { - if ( mNext == null ) - { + public Node next() { + if (mNext == null) { throw new NoSuchElementException("No next element."); } - if ( mNext.wasRemoved() ) - { - mNext = min(mNext.getStart(),mNext.getEnd()); - if ( mNext == null ) - throw new ConcurrentModificationException("Current element was removed, and there are no more elements."); + if (mNext.wasRemoved()) { + mNext = min(mNext.getStart(), mNext.getEnd()); + if (mNext == null) + throw new ConcurrentModificationException( + "Current element was removed, and there are no more elements."); } mLast = mNext; mNext = mNext.getNext(); @@ -1129,10 +938,8 @@ public Node next() } @Override - public void remove() - { - if ( mLast == null ) - { + public void remove() { + if (mLast == null) { throw new IllegalStateException("No entry to remove."); } @@ -1144,30 +951,24 @@ public void remove() private Node mLast; } - public class RevIterator - implements Iterator> - { - public RevIterator( final Node node ) - { + public class RevIterator implements Iterator> { + public RevIterator(final Node node) { mNext = node; } @Override - public boolean hasNext() - { + public boolean hasNext() { return mNext != null; } @Override - public Node next() - { - if ( mNext == null ) - throw new NoSuchElementException("No next element."); - if ( mNext.wasRemoved() ) - { - mNext = max(mNext.getStart(),mNext.getEnd()); - if ( mNext == null ) - throw new ConcurrentModificationException("Current element was removed, and there are no more elements."); + public Node next() { + if (mNext == null) throw new NoSuchElementException("No next element."); + if (mNext.wasRemoved()) { + mNext = max(mNext.getStart(), mNext.getEnd()); + if (mNext == null) + throw new ConcurrentModificationException( + "Current element was removed, and there are no more elements."); } mLast = mNext; mNext = mNext.getPrev(); @@ -1175,10 +976,8 @@ public Node next() } @Override - public void remove() - { - if ( mLast == null ) - { + public void remove() { + if (mLast == null) { throw new IllegalStateException("No entry to remove."); } @@ -1190,45 +989,36 @@ public void remove() private Node mLast; } - public class OverlapIterator - implements Iterator> - { - public OverlapIterator( final int start, final int end ) - { - mNext = minOverlapper(start,end); + public class OverlapIterator implements Iterator> { + public OverlapIterator(final int start, final int end) { + mNext = minOverlapper(start, end); mStart = start; mEnd = end; } @Override - public boolean hasNext() - { + public boolean hasNext() { return mNext != null; } @Override - public Node next() - { - if ( mNext == null ) - { + public Node next() { + if (mNext == null) { throw new NoSuchElementException("No next element."); } - if ( mNext.wasRemoved() ) - { + if (mNext.wasRemoved()) { throw new ConcurrentModificationException("Current element was removed."); } mLast = mNext; - mNext = Node.getNextOverlapper(mNext,mStart,mEnd); + mNext = Node.getNextOverlapper(mNext, mStart, mEnd); return mLast; } @Override - public void remove() - { - if ( mLast == null ) - { + public void remove() { + if (mLast == null) { throw new IllegalStateException("No entry to remove."); } @@ -1242,29 +1032,23 @@ public void remove() private final int mEnd; } - public static class ValuesIterator - implements Iterator - { - public ValuesIterator( final Iterator> itr ) - { + public static class ValuesIterator implements Iterator { + public ValuesIterator(final Iterator> itr) { mItr = itr; } @Override - public boolean hasNext() - { + public boolean hasNext() { return mItr.hasNext(); } @Override - public V1 next() - { + public V1 next() { return mItr.next().getValue(); } @Override - public void remove() - { + public void remove() { mItr.remove(); } diff --git a/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java b/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java index ebec2f484a..00f5d020d3 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java +++ b/src/main/java/htsjdk/samtools/util/IntervalTreeMap.java @@ -41,9 +41,7 @@ * * @author Bob Handsaker */ -public class IntervalTreeMap - extends AbstractMap -{ +public class IntervalTreeMap extends AbstractMap { private final Map> mSequenceMap = new HashMap>(); private final EntrySet mEntrySet = new EntrySet(); @@ -51,8 +49,7 @@ public IntervalTree debugGetTree(final String sequence) { return mSequenceMap.get(sequence); } - public IntervalTreeMap() { - } + public IntervalTreeMap() {} public IntervalTreeMap(final Map map) { for (final Map.Entry entry : map.entrySet()) { @@ -87,11 +84,11 @@ public Set> entrySet() { } @SuppressWarnings("rawtypes") - public boolean equals(final Object o) { + public boolean equals(final Object o) { if (!(o instanceof IntervalTreeMap)) { return false; } - return mSequenceMap.equals(((IntervalTreeMap)o).mSequenceMap); + return mSequenceMap.equals(((IntervalTreeMap) o).mSequenceMap); } public int hashCode() { @@ -143,7 +140,7 @@ public T remove(final Object object) { if (!(object instanceof Interval)) { return null; } - return remove((Interval)object); + return remove((Interval) object); } public T remove(final Interval key) { @@ -164,16 +161,15 @@ public int size() { return size; } /** - * Test overlapping interval + * Test overlapping interval * @param key the Locatable - * @return true if it contains an object overlapping the interval + * @return true if it contains an object overlapping the interval */ public boolean containsOverlapping(final Locatable key) { final IntervalTree tree = mSequenceMap.get(key.getContig()); - return tree!=null && tree.overlappers(key.getStart(), key.getEnd()).hasNext(); - } - - + return tree != null && tree.overlappers(key.getStart(), key.getEnd()).hasNext(); + } + public Collection getOverlapping(final Locatable key) { final List result = new ArrayList(); final IntervalTree tree = mSequenceMap.get(key.getContig()); @@ -192,18 +188,17 @@ public Collection getOverlapping(final Locatable key) { */ public boolean containsContained(final Locatable key) { final IntervalTree tree = mSequenceMap.get(key.getContig()); - if(tree==null) return false; - final Iterator> iterator = tree.overlappers(key.getStart(), key.getEnd()); - while (iterator.hasNext()) { - final IntervalTree.Node node = iterator.next(); - if (node.getStart() >= key.getStart() && node.getEnd() <= key.getEnd()) { - return true; - } + if (tree == null) return false; + final Iterator> iterator = tree.overlappers(key.getStart(), key.getEnd()); + while (iterator.hasNext()) { + final IntervalTree.Node node = iterator.next(); + if (node.getStart() >= key.getStart() && node.getEnd() <= key.getEnd()) { + return true; } + } return false; } - - + public Collection getContained(final Locatable key) { final List result = new ArrayList(); final IntervalTree tree = mSequenceMap.get(key.getContig()); @@ -219,15 +214,14 @@ public Collection getContained(final Locatable key) { return result; } - private class EntrySet - extends AbstractSet> { + private class EntrySet extends AbstractSet> { @Override public void clear() { - IntervalTreeMap.this.clear(); + IntervalTreeMap.this.clear(); } - public boolean contains(final Map.Entry entry) { + public boolean contains(final Map.Entry entry) { if (entry == null) { return false; } @@ -240,7 +234,7 @@ public boolean isEmpty() { } @Override - public Iterator> iterator() { + public Iterator> iterator() { return new EntryIterator(); } @@ -251,10 +245,10 @@ public boolean remove(final Object object) { if (!(object instanceof Map.Entry)) { return false; } - return remove((Map.Entry)object); + return remove((Map.Entry) object); } - public boolean remove(final Map.Entry entry) { + public boolean remove(final Map.Entry entry) { if (this.contains(entry)) { IntervalTreeMap.this.remove(entry.getKey()); return true; @@ -269,8 +263,7 @@ public int size() { } } - private class EntryIterator - implements Iterator> { + private class EntryIterator implements Iterator> { private String mSequence = null; private Iterator mSequenceIterator = null; @@ -287,7 +280,7 @@ public boolean hasNext() { } @Override - public Map.Entry next() { + public Map.Entry next() { if (!hasNext()) { throw new NoSuchElementException("Iterator exhausted"); } @@ -320,8 +313,7 @@ private void advanceSequence() { } } - private class MapEntry - implements Map.Entry { + private class MapEntry implements Map.Entry { private final Interval mKey; private T mValue; diff --git a/src/main/java/htsjdk/samtools/util/IntervalUtil.java b/src/main/java/htsjdk/samtools/util/IntervalUtil.java index e06b0d9c9d..0096f9417f 100644 --- a/src/main/java/htsjdk/samtools/util/IntervalUtil.java +++ b/src/main/java/htsjdk/samtools/util/IntervalUtil.java @@ -25,7 +25,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMSequenceDictionary; - import java.util.Iterator; /** @@ -35,14 +34,14 @@ public class IntervalUtil { /** Return true if the sequence/position lie in the provided interval. */ public static boolean contains(final Interval interval, final String sequenceName, final long position) { - return interval.getContig().equals(sequenceName) && (position >= interval.getStart() && position <= interval.getEnd()); + return interval.getContig().equals(sequenceName) + && (position >= interval.getStart() && position <= interval.getEnd()); } /** Return true if the sequence/position lie in the provided interval list. */ public static boolean contains(final IntervalList intervalList, final String sequenceName, final long position) { for (final Interval interval : intervalList.uniqued().getIntervals()) { - if (contains(interval, sequenceName, position)) - return true; + if (contains(interval, sequenceName, position)) return true; } return false; } @@ -53,7 +52,8 @@ public static boolean contains(final IntervalList intervalList, final String seq * @param intervals * @param sequenceDictionary used to determine order of sequences */ - public static void assertOrderedNonOverlapping(final Iterator intervals, final SAMSequenceDictionary sequenceDictionary) { + public static void assertOrderedNonOverlapping( + final Iterator intervals, final SAMSequenceDictionary sequenceDictionary) { if (!intervals.hasNext()) { return; } @@ -65,8 +65,8 @@ public static void assertOrderedNonOverlapping(final Iterator interval throw new SAMException("Intervals should not overlap: " + prevInterval + "; " + interval); } final int thisSequenceIndex = sequenceDictionary.getSequenceIndex(interval.getContig()); - if (prevSequenceIndex > thisSequenceIndex || - (prevSequenceIndex == thisSequenceIndex && prevInterval.compareTo(interval) >= 0)) { + if (prevSequenceIndex > thisSequenceIndex + || (prevSequenceIndex == thisSequenceIndex && prevInterval.compareTo(interval) >= 0)) { throw new SAMException("Intervals not in order: " + prevInterval + "; " + interval); } prevInterval = interval; @@ -107,9 +107,10 @@ public IntervalCombiner setEnforceSameStrand(final boolean enforceSameStrand) { private boolean enforceSameStrand = false; - public IntervalList combine(final IntervalList intervalList){ + public IntervalList combine(final IntervalList intervalList) { IntervalList retVal = new IntervalList(intervalList.getHeader()); - retVal.addall(IntervalList.getUniqueIntervals(intervalList, combineAbutting, concatenateNames, enforceSameStrand)); + retVal.addall(IntervalList.getUniqueIntervals( + intervalList, combineAbutting, concatenateNames, enforceSameStrand)); return retVal; } } diff --git a/src/main/java/htsjdk/samtools/util/IterableOnceIterator.java b/src/main/java/htsjdk/samtools/util/IterableOnceIterator.java index 5ca4bc1620..09c253ca3d 100644 --- a/src/main/java/htsjdk/samtools/util/IterableOnceIterator.java +++ b/src/main/java/htsjdk/samtools/util/IterableOnceIterator.java @@ -22,8 +22,7 @@ public abstract class IterableOnceIterator implements Iterable, Iterator iterator() { if (iterated) { throw new IllegalStateException("May not call iterator() more than once on IterableOnceIterator."); - } - else { + } else { iterated = true; return this; } @@ -36,7 +35,8 @@ public void remove() { } /** Does nothing, intended to be overridden when needed. */ - @Override public void close() throws IOException { + @Override + public void close() throws IOException { // Default do nothing implementation } } diff --git a/src/main/java/htsjdk/samtools/util/Iterables.java b/src/main/java/htsjdk/samtools/util/Iterables.java index 46c8d5acf6..dd60fa5957 100644 --- a/src/main/java/htsjdk/samtools/util/Iterables.java +++ b/src/main/java/htsjdk/samtools/util/Iterables.java @@ -8,9 +8,7 @@ * @author mccowan */ public class Iterables { - private Iterables() { - - } + private Iterables() {} public static List slurp(final Iterator iterator) { final List ts = new ArrayList(); diff --git a/src/main/java/htsjdk/samtools/util/Lazy.java b/src/main/java/htsjdk/samtools/util/Lazy.java index fca53a6a27..3fc8f5a2e2 100644 --- a/src/main/java/htsjdk/samtools/util/Lazy.java +++ b/src/main/java/htsjdk/samtools/util/Lazy.java @@ -4,10 +4,10 @@ /** * Simple utility for building an on-demand (lazy) object-initializer. - * + * * Works by accepting an initializer describing how to build the on-demand object, which is only called once and only after the first * invocation of {@link #get()} (or it may not be called at all). - * + * * @author mccowan */ public class Lazy { @@ -38,7 +38,7 @@ public interface LazyInitializer extends Supplier { T make(); @Override - default T get(){ + default T get() { return make(); } } diff --git a/src/main/java/htsjdk/samtools/util/ListMap.java b/src/main/java/htsjdk/samtools/util/ListMap.java index 385c2e8a90..ab2fa22773 100755 --- a/src/main/java/htsjdk/samtools/util/ListMap.java +++ b/src/main/java/htsjdk/samtools/util/ListMap.java @@ -34,7 +34,7 @@ * * @author Tim Fennell */ -public class ListMap extends HashMap> { +public class ListMap extends HashMap> { /** Adds a single value to the list stored under a key. */ public void add(K key, V value) { List values = get(key); diff --git a/src/main/java/htsjdk/samtools/util/Locatable.java b/src/main/java/htsjdk/samtools/util/Locatable.java index 8dbf534b39..a855e17aa3 100644 --- a/src/main/java/htsjdk/samtools/util/Locatable.java +++ b/src/main/java/htsjdk/samtools/util/Locatable.java @@ -51,8 +51,8 @@ default boolean overlaps(Locatable other) { * @return true if this interval overlaps other, otherwise false */ default boolean withinDistanceOf(Locatable other, int distance) { - return contigsMatch(other) && - CoordMath.overlaps(getStart(), getEnd(), other.getStart()-distance, other.getEnd()+distance); + return contigsMatch(other) + && CoordMath.overlaps(getStart(), getEnd(), other.getStart() - distance, other.getEnd() + distance); } /** diff --git a/src/main/java/htsjdk/samtools/util/LocationAware.java b/src/main/java/htsjdk/samtools/util/LocationAware.java index 46e1d2ddd5..b040bdf76c 100644 --- a/src/main/java/htsjdk/samtools/util/LocationAware.java +++ b/src/main/java/htsjdk/samtools/util/LocationAware.java @@ -2,16 +2,16 @@ /** * Describes API for getting current position in a stream, writer, or underlying file. - * - * The expected functionality is simple: if you are a output stream / writer, and you've written 50 bytes to the stream, then - * {@link #getPosition()} should return 50; if you are an input stream or file reader, and you've read 25 bytes from the object, then it + * + * The expected functionality is simple: if you are a output stream / writer, and you've written 50 bytes to the stream, then + * {@link #getPosition()} should return 50; if you are an input stream or file reader, and you've read 25 bytes from the object, then it * should return 25. - * + * * In the context of an iterator or any producer-like object that doesn't map directly to a byte stream, {@link #getPosition()} should * return the position (in the underlying stream being read/written to) of the most-recently read/written element. For example, if you * are reading lines from a file with a {@link htsjdk.tribble.readers.AsciiLineReaderIterator}, calling {@link #getPosition()} should return the byte position * of the start of the most recent line returned by {@link htsjdk.tribble.readers.AsciiLineReaderIterator#next()}. - * + * * @author mccowan */ public interface LocationAware { diff --git a/src/main/java/htsjdk/samtools/util/Locus.java b/src/main/java/htsjdk/samtools/util/Locus.java index 8012263c85..ab4c0b5532 100644 --- a/src/main/java/htsjdk/samtools/util/Locus.java +++ b/src/main/java/htsjdk/samtools/util/Locus.java @@ -33,4 +33,4 @@ public interface Locus { /** @return 1-based position */ int getPosition(); -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/util/Log.java b/src/main/java/htsjdk/samtools/util/Log.java index 6410dc006c..b9f24b5054 100644 --- a/src/main/java/htsjdk/samtools/util/Log.java +++ b/src/main/java/htsjdk/samtools/util/Log.java @@ -44,7 +44,12 @@ public final class Log { /** * Enumeration for setting log levels. */ - public enum LogLevel {ERROR, WARNING, INFO, DEBUG} + public enum LogLevel { + ERROR, + WARNING, + INFO, + DEBUG + } private static LogLevel globalLogLevel = LogLevel.INFO; private static PrintStream out = System.err; diff --git a/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java b/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java index 47ea9ff3b3..0179398e4d 100755 --- a/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java +++ b/src/main/java/htsjdk/samtools/util/Md5CalculatingInputStream.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; @@ -59,8 +58,7 @@ public Md5CalculatingInputStream(InputStream is, File digestFile) { try { md5 = MessageDigest.getInstance("MD5"); md5.reset(); - } - catch (NoSuchAlgorithmException e) { + } catch (NoSuchAlgorithmException e) { throw new RuntimeException("MD5 algorithm not found", e); } } @@ -68,7 +66,7 @@ public Md5CalculatingInputStream(InputStream is, File digestFile) { @Override public int read() throws IOException { int result = is.read(); - if (result != -1) md5.update((byte)result); + if (result != -1) md5.update((byte) result); return result; } @@ -79,7 +77,6 @@ public int read(byte[] b) throws IOException { return result; } - @Override public int read(byte[] b, int off, int len) throws IOException { int result = is.read(b, off, len); @@ -88,14 +85,15 @@ public int read(byte[] b, int off, int len) throws IOException { } public String md5() { - if(hash == null) { - throw new SAMException("Attempting to access md5 digest before the entire file is read! Call close first."); + if (hash == null) { + throw new SAMException( + "Attempting to access md5 digest before the entire file is read! Call close first."); } return hash; } private String makeHash() { - if(hash == null) { + if (hash == null) { hash = new BigInteger(1, md5.digest()).toString(16); if (hash.length() != 32) { final String zeros = "00000000000000000000000000000000"; @@ -112,7 +110,7 @@ public void close() throws IOException { is.close(); makeHash(); - if(digestFile != null) { + if (digestFile != null) { BufferedWriter writer = new BufferedWriter(new FileWriter(digestFile)); writer.write(hash); writer.close(); @@ -121,15 +119,20 @@ public void close() throws IOException { // Methods not supported or overridden because they would not result in a valid hash @Override - public boolean markSupported() { return false; } + public boolean markSupported() { + return false; + } + @Override - public void mark(int readlimit) { + public void mark(int readlimit) { throw new UnsupportedOperationException("mark() is not supported by the MD5CalculatingInputStream"); } + @Override - public void reset() throws IOException { + public void reset() throws IOException { throw new UnsupportedOperationException("reset() is not supported by the MD5CalculatingInputStream"); } + @Override public long skip(long n) throws IOException { throw new UnsupportedOperationException("skip() is not supported by the MD5CalculatingInputStream"); @@ -137,6 +140,7 @@ public long skip(long n) throws IOException { // Methods delegated to the wrapped InputStream @Override - public int available() throws IOException { return is.available(); } - + public int available() throws IOException { + return is.available(); + } } diff --git a/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java b/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java index c002b5bf01..7f7d647386 100755 --- a/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/Md5CalculatingOutputStream.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.BufferedWriter; import java.io.File; import java.io.IOException; @@ -60,8 +59,7 @@ public Md5CalculatingOutputStream(OutputStream os, Path digestFile) { try { md5 = MessageDigest.getInstance("MD5"); md5.reset(); - } - catch (NoSuchAlgorithmException e) { + } catch (NoSuchAlgorithmException e) { throw new RuntimeException("MD5 algorithm not found", e); } } @@ -72,7 +70,7 @@ public Md5CalculatingOutputStream(OutputStream os, File digestFile) { @Override public void write(int b) throws IOException { - md5.update((byte)b); + md5.update((byte) b); os.write(b); } @@ -82,7 +80,6 @@ public void write(byte[] b) throws IOException { os.write(b); } - @Override public void write(byte[] b, int off, int len) throws IOException { md5.update(b, off, len); @@ -90,15 +87,16 @@ public void write(byte[] b, int off, int len) throws IOException { } public String md5() { - if(hash == null) { - throw new SAMException("Attempting to access md5 digest before the entire file is written! Call close first."); + if (hash == null) { + throw new SAMException( + "Attempting to access md5 digest before the entire file is written! Call close first."); } return hash; } private String makeHash() { - if(hash == null) { + if (hash == null) { hash = new BigInteger(1, md5.digest()).toString(16); if (hash.length() != 32) { final String zeros = "00000000000000000000000000000000"; @@ -115,7 +113,7 @@ public void close() throws IOException { os.close(); makeHash(); - if(digestFile != null) { + if (digestFile != null) { BufferedWriter writer = Files.newBufferedWriter(digestFile); writer.write(hash); writer.close(); @@ -124,6 +122,7 @@ public void close() throws IOException { // Pass-through method @Override - public void flush() throws IOException { os.flush(); } - + public void flush() throws IOException { + os.flush(); + } } diff --git a/src/main/java/htsjdk/samtools/util/MergingIterator.java b/src/main/java/htsjdk/samtools/util/MergingIterator.java index 4f468b7577..040a87efd5 100644 --- a/src/main/java/htsjdk/samtools/util/MergingIterator.java +++ b/src/main/java/htsjdk/samtools/util/MergingIterator.java @@ -36,106 +36,106 @@ */ public class MergingIterator implements CloseableIterator { - /* - * An Iterator whose natural ordering is by the T that will be returned by the next call to - * next(). - */ - private class ComparableIterator extends PeekableIterator implements Comparable { - - public ComparableIterator(final Iterator iterator) { - super(iterator); - } - - @Override - public int compareTo(final ComparableIterator that) { - if (comparator.getClass() != comparator.getClass()) { - throw new IllegalStateException("Can't compare two ComparableIterators that have different orderings."); - } - - return comparator.compare(this.peek(), that.peek()); - } - } - - /* - * The general flow is to pull the "top" (according to the ComparableIterator's compareTo()) - * iterator off on calls to this.next(), get iterator.next() and then re-add the iterator to - * the queue. Readding reorders the queue so the next "top" iterator is ready. - */ - private final PriorityQueue queue; - - private final Comparator comparator; - - // This is the last T returned by the call to next(). It's used to make sure that the comparators - // always return correctly ordered Ts. - private T lastReturned; - - /** - * Creates a MergingIterator over the given Collection of iterators whose elements will be - * returned in the order defined by the given Comparator. - */ - public MergingIterator(final Comparator comparator, final Collection> iterators) { - if (iterators.isEmpty()) throw new IllegalArgumentException("One or more CloseableIterators must be provided."); - - this.comparator = comparator; - - this.queue = new PriorityQueue(); - for (final CloseableIterator iterator : iterators) { - this.addIfNotEmpty(new ComparableIterator(iterator)); - } - - // If there are no iterators to read from after adding them all to the prioqueue, - // should we throw? it's prob'ly an error. - } - - @Override - public boolean hasNext() { - return ! this.queue.isEmpty(); - } - - @Override - public T next() { - if ( ! this.hasNext()) throw new NoSuchElementException(); - - final ComparableIterator recordIterator = this.queue.poll(); - // Assumes the iterator is closed & removed from the queue before recordIterator.hasNext() == false - final T next = recordIterator.next(); - // I don't like having to test for null here -- it's really only null before the first call - // to next() -- but I don't see any other way - if (this.lastReturned != null && this.comparator.compare(lastReturned, next) > 0) { - throw new IllegalStateException( - "The elements of the input Iterators are not sorted according to the comparator " + - this.comparator.getClass().getName()); - } - - addIfNotEmpty(recordIterator); - this.lastReturned = next; - return next; - } - - /** - * Unsupported. - */ - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - - /** - * Closes every CloseableIterator in this MergingIterator. After calling, calls to - * hasNext() will always return false. - */ - @Override - public void close() { - final Iterator iterator = this.queue.iterator(); - while (iterator.hasNext()) { - final ComparableIterator subIterator = iterator.next(); - subIterator.close(); - iterator.remove(); - } - } - - private void addIfNotEmpty(final ComparableIterator iterator) { - if (iterator.hasNext()) queue.offer(iterator); - else iterator.close(); - } + /* + * An Iterator whose natural ordering is by the T that will be returned by the next call to + * next(). + */ + private class ComparableIterator extends PeekableIterator implements Comparable { + + public ComparableIterator(final Iterator iterator) { + super(iterator); + } + + @Override + public int compareTo(final ComparableIterator that) { + if (comparator.getClass() != comparator.getClass()) { + throw new IllegalStateException("Can't compare two ComparableIterators that have different orderings."); + } + + return comparator.compare(this.peek(), that.peek()); + } + } + + /* + * The general flow is to pull the "top" (according to the ComparableIterator's compareTo()) + * iterator off on calls to this.next(), get iterator.next() and then re-add the iterator to + * the queue. Readding reorders the queue so the next "top" iterator is ready. + */ + private final PriorityQueue queue; + + private final Comparator comparator; + + // This is the last T returned by the call to next(). It's used to make sure that the comparators + // always return correctly ordered Ts. + private T lastReturned; + + /** + * Creates a MergingIterator over the given Collection of iterators whose elements will be + * returned in the order defined by the given Comparator. + */ + public MergingIterator(final Comparator comparator, final Collection> iterators) { + if (iterators.isEmpty()) throw new IllegalArgumentException("One or more CloseableIterators must be provided."); + + this.comparator = comparator; + + this.queue = new PriorityQueue(); + for (final CloseableIterator iterator : iterators) { + this.addIfNotEmpty(new ComparableIterator(iterator)); + } + + // If there are no iterators to read from after adding them all to the prioqueue, + // should we throw? it's prob'ly an error. + } + + @Override + public boolean hasNext() { + return !this.queue.isEmpty(); + } + + @Override + public T next() { + if (!this.hasNext()) throw new NoSuchElementException(); + + final ComparableIterator recordIterator = this.queue.poll(); + // Assumes the iterator is closed & removed from the queue before recordIterator.hasNext() == false + final T next = recordIterator.next(); + // I don't like having to test for null here -- it's really only null before the first call + // to next() -- but I don't see any other way + if (this.lastReturned != null && this.comparator.compare(lastReturned, next) > 0) { + throw new IllegalStateException( + "The elements of the input Iterators are not sorted according to the comparator " + + this.comparator.getClass().getName()); + } + + addIfNotEmpty(recordIterator); + this.lastReturned = next; + return next; + } + + /** + * Unsupported. + */ + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + /** + * Closes every CloseableIterator in this MergingIterator. After calling, calls to + * hasNext() will always return false. + */ + @Override + public void close() { + final Iterator iterator = this.queue.iterator(); + while (iterator.hasNext()) { + final ComparableIterator subIterator = iterator.next(); + subIterator.close(); + iterator.remove(); + } + } + + private void addIfNotEmpty(final ComparableIterator iterator) { + if (iterator.hasNext()) queue.offer(iterator); + else iterator.close(); + } } diff --git a/src/main/java/htsjdk/samtools/util/Murmur3.java b/src/main/java/htsjdk/samtools/util/Murmur3.java index b0c4a3a158..56d1e89e32 100644 --- a/src/main/java/htsjdk/samtools/util/Murmur3.java +++ b/src/main/java/htsjdk/samtools/util/Murmur3.java @@ -36,10 +36,10 @@ * Provides an implementation of the Murmur3_32 hash algorithm that has desirable properties in terms of randomness * and uniformity of the distribution of output values that make it a useful hashing algorithm for downsampling. */ -public final class Murmur3 implements Serializable{ +public final class Murmur3 implements Serializable { private static final long serialVersionUID = 1L; - private final int seed ; + private final int seed; /** Constructs a Murmur3 hash with the given seed. */ public Murmur3(final int seed) { @@ -47,19 +47,19 @@ public Murmur3(final int seed) { } /** Hashes a character stream to an int using Murmur3. */ - public int hashUnencodedChars(CharSequence input){ + public int hashUnencodedChars(CharSequence input) { int h1 = this.seed; // step through the CharSequence 2 chars at a time final int length = input.length(); - for(int i = 1; i < length; i += 2) { + for (int i = 1; i < length; i += 2) { int k1 = input.charAt(i - 1) | (input.charAt(i) << 16); k1 = mixK1(k1); h1 = mixH1(h1, k1); } // deal with any remaining characters - if((length & 1) == 1) { + if ((length & 1) == 1) { int k1 = input.charAt(length - 1); k1 = mixK1(k1); h1 ^= k1; @@ -68,16 +68,16 @@ public int hashUnencodedChars(CharSequence input){ return fmix(h1, 2 * length); } - private int hashInt(int input){ - if(input == 0) return 0; + private int hashInt(int input) { + if (input == 0) return 0; int k1 = mixK1(input); int h1 = mixH1(this.seed, k1); return fmix(h1, 4); } - private int hashLong(long input){ - if(input == 0) return 0; + private int hashLong(long input) { + if (input == 0) return 0; int low = (int) input; int high = (int) (input >>> 32); @@ -90,7 +90,7 @@ private int hashLong(long input){ return fmix(h1, 8); } - private static int mixK1(int k1){ + private static int mixK1(int k1) { final int c1 = 0xcc9e2d51; final int c2 = 0x1b873593; k1 *= c1; @@ -99,7 +99,7 @@ private static int mixK1(int k1){ return k1; } - private static int mixH1(int h1, int k1){ + private static int mixH1(int h1, int k1) { h1 ^= k1; h1 = Integer.rotateLeft(h1, 13); h1 = h1 * 5 + 0xe6546b64; @@ -107,7 +107,7 @@ private static int mixH1(int h1, int k1){ } // Finalization mix - force all bits of a hash block to avalanche - private static int fmix(int h1, int length){ + private static int fmix(int h1, int length) { h1 ^= length; h1 ^= h1 >>> 16; h1 *= 0x85ebca6b; @@ -116,4 +116,4 @@ private static int fmix(int h1, int length){ h1 ^= h1 >>> 16; return h1; } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/util/OverlapDetector.java b/src/main/java/htsjdk/samtools/util/OverlapDetector.java index 3d2f662500..f2b4714621 100644 --- a/src/main/java/htsjdk/samtools/util/OverlapDetector.java +++ b/src/main/java/htsjdk/samtools/util/OverlapDetector.java @@ -83,7 +83,7 @@ public void addLhs(final T object, final Locatable interval) { final int start = interval.getStart() + this.lhsBuffer; final int end = interval.getEnd() - this.lhsBuffer; - if (start <= end) { // Don't put in sequences that have no overlappable bases + if (start <= end) { // Don't put in sequences that have no overlappable bases tree.merge(start, end, Collections.singleton(object), mergeSetsAccountingForSingletons()); } } @@ -116,10 +116,11 @@ public void addAll(final List objects, final List interv throw new IllegalArgumentException("null intervals"); } if (objects.size() != intervals.size()) { - throw new IllegalArgumentException("Objects and intervals must be the same size but were " + objects.size() + " and " + intervals.size()); + throw new IllegalArgumentException("Objects and intervals must be the same size but were " + objects.size() + + " and " + intervals.size()); } - for (int i=0; i end) { return false; @@ -175,7 +176,7 @@ public boolean overlapsAny(final Locatable locatable) { * Gets the Set of objects that overlap the provided locatable. * The returned set may not be modifiable. */ - public Set getOverlaps(final Locatable locatable) { + public Set getOverlaps(final Locatable locatable) { if (locatable == null) { throw new IllegalArgumentException("null locatable"); } @@ -185,7 +186,7 @@ public Set getOverlaps(final Locatable locatable) { return Collections.emptySet(); } final int start = locatable.getStart() + this.rhsBuffer; - final int end = locatable.getEnd() - this.rhsBuffer; + final int end = locatable.getEnd() - this.rhsBuffer; if (start > end) { return Collections.emptySet(); diff --git a/src/main/java/htsjdk/samtools/util/PeekIterator.java b/src/main/java/htsjdk/samtools/util/PeekIterator.java index 3a43ba54b1..89e3641fcd 100644 --- a/src/main/java/htsjdk/samtools/util/PeekIterator.java +++ b/src/main/java/htsjdk/samtools/util/PeekIterator.java @@ -38,12 +38,12 @@ public PeekIterator(final Iterator underlyingIterator) { } /** - * @return true if the iteration has more elements. (In other words, returns true if next would return an element + * @return true if the iteration has more elements. (In other words, returns true if next would return an element * rather than throwing an exception.) */ @Override public boolean hasNext() { - return peekedElement != null || underlyingIterator.hasNext(); + return peekedElement != null || underlyingIterator.hasNext(); } /** diff --git a/src/main/java/htsjdk/samtools/util/PeekableIterator.java b/src/main/java/htsjdk/samtools/util/PeekableIterator.java index 3df4c42ca1..cd4a707e1e 100644 --- a/src/main/java/htsjdk/samtools/util/PeekableIterator.java +++ b/src/main/java/htsjdk/samtools/util/PeekableIterator.java @@ -62,15 +62,14 @@ public Object next() { * Returns the next object but does not advance the iterator. Subsequent calls to peek() * and next() will return the same object. */ - public Object peek(){ + public Object peek() { return this.nextObject; } - private void advance(){ + private void advance() { if (this.iterator.hasNext()) { this.nextObject = iterator.next(); - } - else { + } else { this.nextObject = null; } } diff --git a/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java index a4643db42a..1e042a53a6 100644 --- a/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java +++ b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.samtools.util; @@ -32,8 +32,7 @@ * Wraps output stream in a manner which keeps track of the position within the file and allowing writes * at arbitrary points */ -public final class PositionalOutputStream extends OutputStream implements LocationAware -{ +public final class PositionalOutputStream extends OutputStream implements LocationAware { private final OutputStream out; private long position = 0; @@ -53,13 +52,15 @@ public final void write(final byte[] bytes, final int startIndex, final int numB } @Override - public final void write(final int c) throws IOException { + public final void write(final int c) throws IOException { position++; out.write(c); } @Override - public final long getPosition() { return position; } + public final long getPosition() { + return position; + } @Override public void close() throws IOException { diff --git a/src/main/java/htsjdk/samtools/util/ProcessExecutor.java b/src/main/java/htsjdk/samtools/util/ProcessExecutor.java index 5ff667353c..ee4c1999c4 100644 --- a/src/main/java/htsjdk/samtools/util/ProcessExecutor.java +++ b/src/main/java/htsjdk/samtools/util/ProcessExecutor.java @@ -25,7 +25,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; @@ -41,7 +40,7 @@ /** * Utility class that will execute sub processes via Runtime.getRuntime().exec(...) and read * off the output from stderr and stdout of the sub process. This implementation uses a different - * thread to read each stream: the current thread for stdout and another, internal thread for + * thread to read each stream: the current thread for stdout and another, internal thread for * stderr. This utility is able to handle concurrent executions, spawning as many threads as * are required to handle the concurrent load. * @@ -55,13 +54,13 @@ public Thread newThread(final Runnable r) { return new Thread(r, "ProcessExecutor Thread"); } }); - + /** * Executes the command via Runtime.getRuntime().exec() then writes stderr to log.error * and stdout to log.info and blocks until the command is complete. - * + * * @see Runtime#exec(String) - * + * * @param command command string * @return return code of command */ @@ -70,16 +69,17 @@ public static int execute(final String command) { final Process process = Runtime.getRuntime().exec(command); return readStreamsAndWaitFor(process); } catch (Throwable t) { - throw new SAMException("Unexpected exception executing [" + htsjdk.samtools.util.StringUtil.join(" ", command) + "]", t); + throw new SAMException( + "Unexpected exception executing [" + htsjdk.samtools.util.StringUtil.join(" ", command) + "]", t); } } /** * Executes the command via Runtime.getRuntime().exec() then writes stderr to log.error * and stdout to log.info and blocks until the command is complete. - * + * * @see Runtime#exec(String[]) - * + * * @param commandParts command string * @return return code of command */ @@ -108,7 +108,9 @@ public static int execute(final String[] commandParts, String outputStreamString } return readStreamsAndWaitFor(process); } catch (Throwable t) { - throw new SAMException("Unexpected exception executing [" + htsjdk.samtools.util.StringUtil.join(" ", commandParts) + "]", t); + throw new SAMException( + "Unexpected exception executing [" + htsjdk.samtools.util.StringUtil.join(" ", commandParts) + "]", + t); } } @@ -117,7 +119,8 @@ public static String executeAndReturnResult(final String command) { final Process process = Runtime.getRuntime().exec(command); final StringBuilderProcessOutputReader err = new StringBuilderProcessOutputReader(process.getErrorStream()); final Future stderrReader = executorService.submit(err); - final StringBuilderProcessOutputReader stdout = new StringBuilderProcessOutputReader(process.getInputStream()); + final StringBuilderProcessOutputReader stdout = + new StringBuilderProcessOutputReader(process.getInputStream()); stdout.run(); // wait for stderr reader to be done stderrReader.get(); @@ -126,7 +129,6 @@ public static String executeAndReturnResult(final String command) { } catch (Throwable t) { throw new SAMException("Unexpected exception executing [" + command + "]", t); } - } public static class ExitStatusAndOutput { @@ -170,39 +172,38 @@ public static ExitStatusAndOutput executeAndReturnInterleavedOutput(final String } } - private static ExitStatusAndOutput interleaveProcessOutput(final Process process) throws InterruptedException, IOException { + private static ExitStatusAndOutput interleaveProcessOutput(final Process process) + throws InterruptedException, IOException { final BufferedReader stdoutReader = new BufferedReader(new InputStreamReader(process.getInputStream())); final BufferedReader stderrReader = new BufferedReader(new InputStreamReader(process.getErrorStream())); final StringBuilder sb = new StringBuilder(); String stdoutLine = null; String stderrLine = null; - while ((stderrLine = stderrReader.readLine()) != null || - (stdoutLine = stdoutReader.readLine()) != null) { - if (stderrLine!= null) sb.append(stderrLine).append('\n'); - if (stdoutLine!= null) sb.append(stdoutLine).append('\n'); + while ((stderrLine = stderrReader.readLine()) != null || (stdoutLine = stdoutReader.readLine()) != null) { + if (stderrLine != null) sb.append(stderrLine).append('\n'); + if (stdoutLine != null) sb.append(stdoutLine).append('\n'); stderrLine = null; stdoutLine = null; } return new ExitStatusAndOutput(process.waitFor(), sb.toString(), null); - } - private static int readStreamsAndWaitFor(final Process process) - throws InterruptedException, ExecutionException { - final Future stderrReader = executorService.submit(new LogErrorProcessOutputReader(process.getErrorStream())); + private static int readStreamsAndWaitFor(final Process process) throws InterruptedException, ExecutionException { + final Future stderrReader = + executorService.submit(new LogErrorProcessOutputReader(process.getErrorStream())); new LogInfoProcessOutputReader(process.getInputStream()).run(); // wait for stderr reader to be done stderrReader.get(); return process.waitFor(); } - /** * Runnable that reads off the given stream and logs it somewhere. */ - private static abstract class ProcessOutputReader implements Runnable { + private abstract static class ProcessOutputReader implements Runnable { private final BufferedReader reader; + public ProcessOutputReader(final InputStream stream) { reader = new BufferedReader(new InputStreamReader(stream)); } @@ -218,26 +219,46 @@ public void run() { throw new SAMException("Unexpected exception reading from process stream", e); } } - + protected abstract void write(String message); } - private static class LogErrorProcessOutputReader extends ProcessOutputReader { - public LogErrorProcessOutputReader(final InputStream stream) { super(stream); } - @Override protected void write(final String message) { log.error(message); } + public LogErrorProcessOutputReader(final InputStream stream) { + super(stream); + } + + @Override + protected void write(final String message) { + log.error(message); + } } private static class LogInfoProcessOutputReader extends ProcessOutputReader { - public LogInfoProcessOutputReader(final InputStream stream) { super(stream); } - @Override protected void write(final String message) { log.info(message); } + public LogInfoProcessOutputReader(final InputStream stream) { + super(stream); + } + + @Override + protected void write(final String message) { + log.info(message); + } } private static class StringBuilderProcessOutputReader extends ProcessOutputReader { private final StringBuilder sb = new StringBuilder(); - public StringBuilderProcessOutputReader(final InputStream stream) { super(stream); } - @Override protected void write(final String message) { sb.append(message).append('\n'); } - public String getOutput() { return sb.toString(); } - } + public StringBuilderProcessOutputReader(final InputStream stream) { + super(stream); + } + + @Override + protected void write(final String message) { + sb.append(message).append('\n'); + } + + public String getOutput() { + return sb.toString(); + } + } } diff --git a/src/main/java/htsjdk/samtools/util/ProgressLogger.java b/src/main/java/htsjdk/samtools/util/ProgressLogger.java index 6a293d6eee..5dc297f847 100644 --- a/src/main/java/htsjdk/samtools/util/ProgressLogger.java +++ b/src/main/java/htsjdk/samtools/util/ProgressLogger.java @@ -37,16 +37,20 @@ public ProgressLogger(final Log log, final int n, final String verb) { * @param log the Log object to write outputs to * @param n the frequency with which to output (i.e. every N records) */ - public ProgressLogger(final Log log, final int n) { this(log, n, "Processed"); } + public ProgressLogger(final Log log, final int n) { + this(log, n, "Processed"); + } /** * Construct a progress logger with the desired log, the verb "Processed" and a period of 1m records. * @param log the Log object to write outputs to */ - public ProgressLogger(final Log log) { this(log, 1000000); } + public ProgressLogger(final Log log) { + this(log, 1000000); + } @Override protected void log(final String... message) { - log.info((Object[])message); + log.info((Object[]) message); } } diff --git a/src/main/java/htsjdk/samtools/util/ProgressLoggerInterface.java b/src/main/java/htsjdk/samtools/util/ProgressLoggerInterface.java index becc802813..557812077c 100644 --- a/src/main/java/htsjdk/samtools/util/ProgressLoggerInterface.java +++ b/src/main/java/htsjdk/samtools/util/ProgressLoggerInterface.java @@ -23,7 +23,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ - import htsjdk.samtools.SAMRecord; /** @@ -31,8 +30,12 @@ */ public interface ProgressLoggerInterface { - boolean record(final String chrom, final int pos); - boolean record(final SAMRecord rec); - boolean record(final SAMRecord... recs); - default void reset() {}; + boolean record(final String chrom, final int pos); + + boolean record(final SAMRecord rec); + + boolean record(final SAMRecord... recs); + + default void reset() {} + ; } diff --git a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java index 0147daa354..59b16799b5 100644 --- a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java +++ b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java @@ -1,12 +1,13 @@ package htsjdk.samtools.util; +import static java.util.Arrays.asList; + import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import htsjdk.samtools.SamReader; import htsjdk.samtools.fastq.FastqReader; import htsjdk.samtools.fastq.FastqRecord; - import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -20,8 +21,6 @@ import java.util.Queue; import java.util.Set; -import static java.util.Arrays.asList; - /** * Utility for determining the type of quality encoding/format (see {@link FastqQualityFormat}) used in a SAM/BAM or Fastq. *

    @@ -38,9 +37,13 @@ public class QualityEncodingDetector { * The maximum number of records over which the detector will iterate before making a determination, by default. */ public static final long DEFAULT_MAX_RECORDS_TO_ITERATE = 10000; + private static final Log log = Log.getInstance(QualityEncodingDetector.class); - public enum FileContext {FASTQ, SAM} + public enum FileContext { + FASTQ, + SAM + } static class Range { final int low, high; @@ -60,23 +63,13 @@ boolean contains(final int value) { */ enum QualityScheme { Phred( - new Range(0, 93), // Raw value range - new Range(33, 126), // ASCII value range - asList(new Range(33, 58)), // Ranges into which we expect at least one ASCII value to fall + new Range(0, 93), // Raw value range + new Range(33, 126), // ASCII value range + asList(new Range(33, 58)), // Ranges into which we expect at least one ASCII value to fall FastqQualityFormat.Standard // Associated quality format - ), - Solexa( - new Range(-5, 62), - new Range(59, 126), - new ArrayList(), - FastqQualityFormat.Solexa - ), - Illumina( - new Range(0, 62), - new Range(64, 126), - new ArrayList(), - FastqQualityFormat.Illumina - ); + ), + Solexa(new Range(-5, 62), new Range(59, 126), new ArrayList(), FastqQualityFormat.Solexa), + Illumina(new Range(0, 62), new Range(64, 126), new ArrayList(), FastqQualityFormat.Illumina); final Range rawRange, asciiRange; /** * Ranges into which we expect at least one value to fall if this formatting is being used. For example, for @@ -84,9 +77,14 @@ enum QualityScheme { * probably not Standard-encoded. */ final List expectedAsciiRanges; + final FastqQualityFormat qualityFormat; - QualityScheme(final Range rawRange, final Range asciiRange, final List expectedAsciiRanges, final FastqQualityFormat qualityFormat) { + QualityScheme( + final Range rawRange, + final Range asciiRange, + final List expectedAsciiRanges, + final FastqQualityFormat qualityFormat) { this.rawRange = rawRange; this.asciiRange = asciiRange; this.expectedAsciiRanges = expectedAsciiRanges; @@ -122,9 +120,11 @@ public void add(final FastqRecord fastqRecord) { * transformation by asking {@link SAMRecord} to convert the quality back into the ASCII that was read in the file. */ public void add(final SAMRecord samRecord, final boolean useOriginalQualities) { - addAsciiQuality(useOriginalQualities && samRecord.getOriginalBaseQualities() != null - ? SAMUtils.phredToFastq(samRecord.getOriginalBaseQualities()).getBytes() - : samRecord.getBaseQualityString().getBytes()); + addAsciiQuality( + useOriginalQualities && samRecord.getOriginalBaseQualities() != null + ? SAMUtils.phredToFastq(samRecord.getOriginalBaseQualities()) + .getBytes() + : samRecord.getBaseQualityString().getBytes()); } public void add(final SAMRecord samRecord) { @@ -167,7 +167,8 @@ public long add(final long maxRecords, final SamReader reader) { * * @return The number of records read */ - public long add(final long maxRecords, final CloseableIterator iterator, final boolean useOriginalQualities) { + public long add( + final long maxRecords, final CloseableIterator iterator, final boolean useOriginalQualities) { long recordCount = 0; try { while (iterator.hasNext() && recordCount++ != maxRecords) { @@ -328,12 +329,12 @@ public static FastqQualityFormat detect(final FastqReader... readers) { * so more records is better) * @return The determined quality format */ - public static FastqQualityFormat detect(final long maxRecords, final CloseableIterator iterator, final boolean useOriginalQualities) { + public static FastqQualityFormat detect( + final long maxRecords, final CloseableIterator iterator, final boolean useOriginalQualities) { final QualityEncodingDetector detector = new QualityEncodingDetector(); final long recordCount = detector.add(maxRecords, iterator, useOriginalQualities); log.debug(String.format("Read %s records.", recordCount)); return detector.generateBestGuess(FileContext.SAM, null); - } public static FastqQualityFormat detect(final long maxRecords, final CloseableIterator iterator) { @@ -348,14 +349,13 @@ public static FastqQualityFormat detect(final SamReader reader) { return detect(DEFAULT_MAX_RECORDS_TO_ITERATE, reader); } - /** * Reads through the records in the provided SAM reader and uses their quality scores to sanity check the expected * quality passed in. If the expected quality format is sane we just hand this back otherwise we throw a * {@link SAMException}. */ public static FastqQualityFormat detect(final SamReader reader, final FastqQualityFormat expectedQualityFormat) { - //sanity check expectedQuality + // sanity check expectedQuality final QualityEncodingDetector detector = new QualityEncodingDetector(); final long recordCount = detector.add(DEFAULT_MAX_RECORDS_TO_ITERATE, reader.iterator()); log.debug(String.format("Read %s records from %s.", recordCount, reader)); @@ -374,9 +374,9 @@ public FastqQualityFormat generateBestGuess(final FileContext context, final Fas if (possibleFormats.contains(expectedQuality)) { return expectedQuality; } else { - throw new SAMException( - String.format("The quality values do not fall in the range appropriate for the expected quality of %s.", - expectedQuality.name())); + throw new SAMException(String.format( + "The quality values do not fall in the range appropriate for the expected quality of %s.", + expectedQuality.name())); } } else { possibleFormats = this.generateCandidateQualities(true); @@ -386,14 +386,16 @@ public FastqQualityFormat generateBestGuess(final FileContext context, final Fas case 2: if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Solexa))) { return FastqQualityFormat.Illumina; - } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Standard))) { + } else if (possibleFormats.equals( + EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Standard))) { switch (context) { case FASTQ: return FastqQualityFormat.Illumina; case SAM: return FastqQualityFormat.Standard; } - } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Standard, FastqQualityFormat.Solexa))) { + } else if (possibleFormats.equals( + EnumSet.of(FastqQualityFormat.Standard, FastqQualityFormat.Solexa))) { return FastqQualityFormat.Standard; } else throw new SAMException("Unreachable code."); case 3: diff --git a/src/main/java/htsjdk/samtools/util/QualityUtil.java b/src/main/java/htsjdk/samtools/util/QualityUtil.java index e9ad9ec0d0..366fca0ca0 100644 --- a/src/main/java/htsjdk/samtools/util/QualityUtil.java +++ b/src/main/java/htsjdk/samtools/util/QualityUtil.java @@ -36,8 +36,8 @@ public final class QualityUtil { static { errorProbabilityByPhredScore = new double[101]; - for (int i=0; i { - private static final float hashTableLoadFactor = 0.75f; + private static final float hashTableLoadFactor = 0.75f; - private final LinkedHashMap map; + private final LinkedHashMap map; private final int cacheSize; private final ResourceLimitedMapFunctor functor; @@ -50,17 +50,18 @@ public ResourceLimitedMap(final int cacheSize, final ResourceLimitedMapFunctor(hashTableCapacity, hashTableLoadFactor, true) { - @Override protected boolean removeEldestEntry (final Map.Entry eldest) { - if (size() > ResourceLimitedMap.this.cacheSize) { - ResourceLimitedMap.this.functor.finalizeValue(eldest.getKey(), eldest.getValue()); - return true; - } else { - return false; - } + map = new LinkedHashMap(hashTableCapacity, hashTableLoadFactor, true) { + @Override + protected boolean removeEldestEntry(final Map.Entry eldest) { + if (size() > ResourceLimitedMap.this.cacheSize) { + ResourceLimitedMap.this.functor.finalizeValue(eldest.getKey(), eldest.getValue()); + return true; + } else { + return false; + } } }; } diff --git a/src/main/java/htsjdk/samtools/util/RuntimeEOFException.java b/src/main/java/htsjdk/samtools/util/RuntimeEOFException.java index 2e80fa64c9..b41506a250 100644 --- a/src/main/java/htsjdk/samtools/util/RuntimeEOFException.java +++ b/src/main/java/htsjdk/samtools/util/RuntimeEOFException.java @@ -29,8 +29,7 @@ * Thrown by various codecs to indicate EOF without having to clutter the API with throws clauses */ public class RuntimeEOFException extends SAMException { - public RuntimeEOFException() { - } + public RuntimeEOFException() {} public RuntimeEOFException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/util/RuntimeIOException.java b/src/main/java/htsjdk/samtools/util/RuntimeIOException.java index 571571b39c..6014f69a83 100644 --- a/src/main/java/htsjdk/samtools/util/RuntimeIOException.java +++ b/src/main/java/htsjdk/samtools/util/RuntimeIOException.java @@ -29,8 +29,7 @@ * Thrown by various IO classes to indicate IOException without having to clutter the API with throws clauses */ public class RuntimeIOException extends SAMException { - public RuntimeIOException() { - } + public RuntimeIOException() {} public RuntimeIOException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/util/RuntimeScriptException.java b/src/main/java/htsjdk/samtools/util/RuntimeScriptException.java index 570a64e332..53e04685ce 100644 --- a/src/main/java/htsjdk/samtools/util/RuntimeScriptException.java +++ b/src/main/java/htsjdk/samtools/util/RuntimeScriptException.java @@ -23,13 +23,11 @@ */ package htsjdk.samtools.util; - /** * Thrown by classes handling script engines like the javascript-based filters for SAM/VCF */ public class RuntimeScriptException extends RuntimeException { - public RuntimeScriptException() { - } + public RuntimeScriptException() {} public RuntimeScriptException(final String s) { super(s); diff --git a/src/main/java/htsjdk/samtools/util/SAMRecordPrefetchingIterator.java b/src/main/java/htsjdk/samtools/util/SAMRecordPrefetchingIterator.java index c137d8e168..a0919d36a0 100644 --- a/src/main/java/htsjdk/samtools/util/SAMRecordPrefetchingIterator.java +++ b/src/main/java/htsjdk/samtools/util/SAMRecordPrefetchingIterator.java @@ -1,13 +1,11 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMRecord; - import java.util.NoSuchElementException; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicInteger; - /** * Iterator that uses a dedicated background thread to prefetch SAMRecords, * reading ahead by a set number of bases to improve throughput. @@ -35,7 +33,8 @@ public SAMRecordPrefetchingIterator(final CloseableIterator iterator, this.basePrefetchLimit = basePrefetchLimit; this.basesAllowed = new AtomicInteger(this.basePrefetchLimit); - this.backgroundThread = new Thread(this::prefetch, SAMRecordPrefetchingIterator.class.getSimpleName() + "Thread"); + this.backgroundThread = + new Thread(this::prefetch, SAMRecordPrefetchingIterator.class.getSimpleName() + "Thread"); this.backgroundThread.setDaemon(true); this.backgroundThread.start(); } @@ -67,7 +66,8 @@ private void prefetch() { // InterruptedException is expected if the iterator is being closed return; } catch (final Throwable t) { - // All other exceptions are placed onto the queue so they can be reported when accessed by the main thread + // All other exceptions are placed onto the queue so they can be reported when accessed by the main + // thread // Errors are immediately printed so their information is propagated to the user and not lost // in the case that the JVM dies before the Error is passed up through the queue if (t instanceof Error) { @@ -82,16 +82,16 @@ private void prefetch() { public void close() { if (this.backgroundThread == null) return; /* - If prefetch thread is interrupted while awake and before acquiring permits, it will either acquire the permits - and pass through to the next case, or check interruption status before sleeping then exit immediately - If prefetch thread is interrupted while awake and after acquiring permits, it will check interruption status - at the beginning of the next loop, the queue is unbounded so adding will never block - If prefetch thread is interrupted while asleep waiting for bases, it will catch InterruptedException and exit - - Prefetch thread cannot be interrupted while awake and acquiring permits, missing the interrupt, - because the interrupt occurs in a block synchronized on the same monitor as the acquire loop, - so the prefetch thread must be asleep for the closing thread to acquire the lock and issue the interrupt - */ + If prefetch thread is interrupted while awake and before acquiring permits, it will either acquire the permits + and pass through to the next case, or check interruption status before sleeping then exit immediately + If prefetch thread is interrupted while awake and after acquiring permits, it will check interruption status + at the beginning of the next loop, the queue is unbounded so adding will never block + If prefetch thread is interrupted while asleep waiting for bases, it will catch InterruptedException and exit + + Prefetch thread cannot be interrupted while awake and acquiring permits, missing the interrupt, + because the interrupt occurs in a block synchronized on the same monitor as the acquire loop, + so the prefetch thread must be asleep for the closing thread to acquire the lock and issue the interrupt + */ synchronized (this.basesAllowed) { this.backgroundThread.interrupt(); } @@ -167,4 +167,4 @@ public Either(final Throwable error) { this.error = error; } } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/util/SamConstants.java b/src/main/java/htsjdk/samtools/util/SamConstants.java index ec5f6816fe..6739de443f 100644 --- a/src/main/java/htsjdk/samtools/util/SamConstants.java +++ b/src/main/java/htsjdk/samtools/util/SamConstants.java @@ -28,10 +28,11 @@ * A set of constants defined in the sam-spec (https://github.com/samtools/hts-specs) that need * to be referenced in code. */ - public final class SamConstants { +public final class SamConstants { - //No need to instantiate this class since all the constants should be static - private SamConstants(){}; + // No need to instantiate this class since all the constants should be static + private SamConstants() {} + ; /** * The recommended separator to use when specifying multiple barcodes together in the same tag. diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java index b0f8dfc2d5..c095400e47 100644 --- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.*; - import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -38,8 +37,8 @@ * * @author alecw@broadinstitute.org */ - -public class SamLocusIterator extends AbstractLocusIterator { +public class SamLocusIterator + extends AbstractLocusIterator { /** * Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use @@ -96,7 +95,8 @@ void accumulateSamRecord(final SAMRecord rec) { for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) { final int readStart = alignmentBlock.getReadStart(); final int blockLength = alignmentBlock.getLength(); - final int blockStartAccIndex = alignmentBlock.getReferenceStart() - accumulator.get(0).getPosition(); + final int blockStartAccIndex = + alignmentBlock.getReferenceStart() - accumulator.get(0).getPosition(); for (int i = 0; i < blockLength; ++i) { // 0-based offset into the read of the current base @@ -104,7 +104,8 @@ void accumulateSamRecord(final SAMRecord rec) { // if the quality score cutoff is met, accumulate the base info if (dontCheckQualities || baseQualities.length == 0 || baseQualities[readOffset] >= minQuality) { - // 0-based offset from the aligned position of the first base in the read to the aligned position of the current base. + // 0-based offset from the aligned position of the first base in the read to the aligned position of + // the current base. final int accumulateIndex = blockStartAccIndex + i; accumulator.get(accumulateIndex).add(new RecordAndOffset(rec, readOffset)); } @@ -134,10 +135,12 @@ void accumulateIndels(SAMRecord rec) { final CigarOperator operator = e.getOperator(); if (operator.equals(CigarOperator.I)) { // insertions are included in the previous base - if (dontCheckQualities || baseQualities.length == 0 || baseQualities[readBase] >= minQuality){ + if (dontCheckQualities || baseQualities.length == 0 || baseQualities[readBase] >= minQuality) { accumulator.get(baseAccIndex - 1).addInserted(rec, readBase); - readBase += e.getLength(); } + // Always advance past inserted bases regardless of quality check, + // otherwise subsequent CIGAR positions will be misaligned. + readBase += e.getLength(); } else if (operator.equals(CigarOperator.D)) { // accumulate for each position that spans the deletion for (int i = 0; i < e.getLength(); i++) { @@ -159,12 +162,14 @@ private void prepareAccumulatorForRecord(SAMRecord rec) { final int alignmentLength = alignmentEnd - alignmentStart; // if there is an insertion in the first base and it is not tracked in the accumulator, add it - if (includeIndels && startWithInsertion(rec.getCigar()) && - (accumulator.isEmpty() || accumulator.get(0).getPosition() == alignmentStart)) { + if (includeIndels + && startWithInsertion(rec.getCigar()) + && (accumulator.isEmpty() || accumulator.get(0).getPosition() == alignmentStart)) { accumulator.add(0, new LocusInfo(ref, alignmentStart - 1)); } // Ensure there are LocusInfos up to and including this position - final int accIndexWhereReadStarts = accumulator.isEmpty() ? 0 : alignmentStart - accumulator.get(0).getPosition(); + final int accIndexWhereReadStarts = + accumulator.isEmpty() ? 0 : alignmentStart - accumulator.get(0).getPosition(); final int newLocusesCount = accIndexWhereReadStarts + alignmentLength - accumulator.size(); for (int i = 0; i <= newLocusesCount; i++) { accumulator.add(new LocusInfo(ref, alignmentEnd - newLocusesCount + i)); @@ -249,20 +254,21 @@ public void addDeleted(final SAMRecord read, int previousPosition) { if (deletedInRecord == null) { deletedInRecord = new ArrayList<>(); } - deletedInRecord.add(new RecordAndOffset(read, previousPosition, AbstractRecordAndOffset.AlignmentType.Deletion)); + deletedInRecord.add( + new RecordAndOffset(read, previousPosition, AbstractRecordAndOffset.AlignmentType.Deletion)); } /** * Accumulate info for one read with an insertion. * For this locus, the reads in the insertion are included also in recordAndOffsets */ - public void addInserted(final SAMRecord read, int firstPosition) { if (insertedInRecord == null) { insertedInRecord = new ArrayList<>(); } - insertedInRecord.add(new RecordAndOffset(read, firstPosition, AbstractRecordAndOffset.AlignmentType.Insertion)); + insertedInRecord.add( + new RecordAndOffset(read, firstPosition, AbstractRecordAndOffset.AlignmentType.Insertion)); } public List getDeletedInRecord() { @@ -270,7 +276,9 @@ public List getDeletedInRecord() { } public List getInsertedInRecord() { - return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord); + return (insertedInRecord == null) + ? Collections.emptyList() + : Collections.unmodifiableList(insertedInRecord); } /** @@ -281,16 +289,15 @@ public int size() { return super.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size()); } - /** * @return true if all the RecordAndOffset lists are empty; * false if at least one have records */ @Override public boolean isEmpty() { - return getRecordAndOffsets().isEmpty() && - (deletedInRecord == null || deletedInRecord.isEmpty()) && - (insertedInRecord == null || insertedInRecord.isEmpty()); + return getRecordAndOffsets().isEmpty() + && (deletedInRecord == null || deletedInRecord.isEmpty()) + && (insertedInRecord == null || insertedInRecord.isEmpty()); } } } diff --git a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java index 5dd7589d70..9ee199cfa6 100644 --- a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java +++ b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java @@ -28,7 +28,6 @@ import htsjdk.samtools.SamReader; import htsjdk.samtools.filter.IntervalFilter; import htsjdk.samtools.filter.SamRecordFilter; - import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; @@ -50,9 +49,8 @@ public class SamRecordIntervalIteratorFactory { * Note however that if there are many intervals that cover a great deal of the genome, using the BAM * index may actually make performance worse. */ - public CloseableIterator makeSamRecordIntervalIterator(final SamReader samReader, - final List uniqueIntervals, - final boolean useIndex) { + public CloseableIterator makeSamRecordIntervalIterator( + final SamReader samReader, final List uniqueIntervals, final boolean useIndex) { if (!samReader.hasIndex() || !useIndex) { final int stopAfterSequence; final int stopAfterPosition; @@ -65,13 +63,16 @@ public CloseableIterator makeSamRecordIntervalIterator(final SamReade stopAfterPosition = lastInterval.getEnd(); } final IntervalFilter intervalFilter = new IntervalFilter(uniqueIntervals, samReader.getFileHeader()); - return new StopAfterFilteringIterator(samReader.iterator(), intervalFilter, stopAfterSequence, stopAfterPosition); + return new StopAfterFilteringIterator( + samReader.iterator(), intervalFilter, stopAfterSequence, stopAfterPosition); } else { final QueryInterval[] queryIntervals = new QueryInterval[uniqueIntervals.size()]; for (int i = 0; i < queryIntervals.length; ++i) { final Interval inputInterval = uniqueIntervals.get(i); - queryIntervals[i] = new QueryInterval(samReader.getFileHeader().getSequenceIndex(inputInterval.getContig()), - inputInterval.getStart(), inputInterval.getEnd()); + queryIntervals[i] = new QueryInterval( + samReader.getFileHeader().getSequenceIndex(inputInterval.getContig()), + inputInterval.getStart(), + inputInterval.getEnd()); } return samReader.queryOverlapping(queryIntervals); } @@ -92,8 +93,8 @@ private class StopAfterFilteringIterator implements CloseableIterator private final SamRecordFilter filter; private SAMRecord next = null; - private StopAfterFilteringIterator(Iterator iterator, SamRecordFilter filter, - int stopAfterSequence, int stopAfterPosition) { + private StopAfterFilteringIterator( + Iterator iterator, SamRecordFilter filter, int stopAfterSequence, int stopAfterPosition) { this.stopAfterSequence = stopAfterSequence; this.stopAfterPosition = stopAfterPosition; this.iterator = iterator; @@ -101,7 +102,6 @@ private StopAfterFilteringIterator(Iterator iterator, SamRecordFilter next = getNextRecord(); } - /** * Returns true if the iteration has more elements. * @@ -148,7 +148,8 @@ protected SAMRecord getNextRecord() { SAMRecord record = iterator.next(); if (record.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) return null; else if (record.getReferenceIndex() > stopAfterSequence) return null; - else if (record.getReferenceIndex() == stopAfterSequence && record.getAlignmentStart() > stopAfterPosition) { + else if (record.getReferenceIndex() == stopAfterSequence + && record.getAlignmentStart() > stopAfterPosition) { return null; } if (!filter.filterOut(record)) { diff --git a/src/main/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java b/src/main/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java index 46cf8bff14..fda9a18844 100644 --- a/src/main/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java +++ b/src/main/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java @@ -28,7 +28,6 @@ import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; - import java.io.File; import java.util.ArrayDeque; import java.util.BitSet; @@ -72,7 +71,12 @@ public class SamRecordTrackingBuffer { * @param header the header * @param clazz the class that extends SamRecordWithOrdinal */ - public SamRecordTrackingBuffer(final int maxRecordsInRam, final int blockSize, final List tmpDirs, final SAMFileHeader header, final Class clazz) { + public SamRecordTrackingBuffer( + final int maxRecordsInRam, + final int blockSize, + final List tmpDirs, + final SAMFileHeader header, + final Class clazz) { this.availableRecordsInMemory = maxRecordsInRam; this.blockSize = blockSize; this.tmpDirs = tmpDirs; @@ -84,10 +88,14 @@ public SamRecordTrackingBuffer(final int maxRecordsInRam, final int blockSize, f } /** Returns true if we are tracking no records, false otherwise */ - public boolean isEmpty() { return (blocks.isEmpty() || this.blocks.getFirst().isEmpty()); } + public boolean isEmpty() { + return (blocks.isEmpty() || this.blocks.getFirst().isEmpty()); + } /** Returns true if we can return the next record (it has been examined). */ - public boolean canEmit() { return (!this.blocks.isEmpty() && this.blocks.getFirst().canEmit()); } + public boolean canEmit() { + return (!this.blocks.isEmpty() && this.blocks.getFirst().canEmit()); + } /** * Add the given SAMRecordIndex to the buffer. The records must be added in order. @@ -104,10 +112,12 @@ public void add(final SamRecordWithOrdinal samRecordWithOrdinal) { } // If necessary, create a new block, using as much ram as available up to its total size if (this.blocks.isEmpty() || !this.blocks.getLast().canAdd()) { - // once ram is given to a block, we can't give it to another block (until some is recovered from the head of the queue) + // once ram is given to a block, we can't give it to another block (until some is recovered from the head of + // the queue) final int blockRam = Math.min(this.blockSize, this.availableRecordsInMemory); this.availableRecordsInMemory = this.availableRecordsInMemory - blockRam; - final BufferBlock block = new BufferBlock(this.blockSize, blockRam, this.tmpDirs, this.header, samRecordWithOrdinal.getRecordOrdinal()); + final BufferBlock block = new BufferBlock( + this.blockSize, blockRam, this.tmpDirs, this.header, samRecordWithOrdinal.getRecordOrdinal()); this.blocks.addLast(block); } this.blocks.getLast().add(samRecordWithOrdinal); @@ -125,10 +135,12 @@ public SamRecordWithOrdinal next() { throw new NoSuchElementException("Attempting to remove an element from an empty SamRecordTrackingBuffer"); final BufferBlock headBlock = this.blocks.getFirst(); if (!headBlock.canEmit()) - throw new SAMException("Attempting to get a samRecordWithOrdinal from the SamRecordTrackingBuffer that has not been through " + - "marked as examined. canEmit() must return true in order to call next()"); + throw new SAMException( + "Attempting to get a samRecordWithOrdinal from the SamRecordTrackingBuffer that has not been through " + + "marked as examined. canEmit() must return true in order to call next()"); - // If the samRecordWithOrdinal was stored in memory, reclaim its ram for use in additional blocks at tail of queue + // If the samRecordWithOrdinal was stored in memory, reclaim its ram for use in additional blocks at tail of + // queue // NB: this must be checked before calling next(), as that method updates the block-head if (!headBlock.headRecordIsFromDisk()) { this.availableRecordsInMemory++; @@ -143,17 +155,22 @@ public SamRecordWithOrdinal next() { } /** Removes the next record from this buffer */ - public void remove() { this.next(); } + public void remove() { + this.next(); + } /** * Return the total number of elements in the queue, both in memory and on disk */ - public long size() { return this.queueTailRecordIndex - this.queueHeadRecordIndex + 1; } + public long size() { + return this.queueTailRecordIndex - this.queueHeadRecordIndex + 1; + } /** Returns the block that holds the sam record at the given index, null if no such block exists */ private BufferBlock getBlock(final SamRecordWithOrdinal samRecordWithOrdinal) { for (final BufferBlock block : this.blocks) { - if (block.getStartIndex() <= samRecordWithOrdinal.getRecordOrdinal() && block.getEndIndex() >= samRecordWithOrdinal.getRecordOrdinal()) { + if (block.getStartIndex() <= samRecordWithOrdinal.getRecordOrdinal() + && block.getEndIndex() >= samRecordWithOrdinal.getRecordOrdinal()) { return block; } } @@ -175,8 +192,10 @@ public boolean contains(final SamRecordWithOrdinal samRecordWithOrdinal) { public void setResultState(final SamRecordWithOrdinal samRecordWithOrdinal, final boolean resultState) { final BufferBlock block = getBlock(samRecordWithOrdinal); if (null == block) { - throw new SAMException("Attempted to set examined information on a samRecordWithOrdinal whose index is not found " + - "in the SamRecordTrackingBuffer. recordIndex: " + samRecordWithOrdinal.getRecordOrdinal()); + throw new SAMException( + "Attempted to set examined information on a samRecordWithOrdinal whose index is not found " + + "in the SamRecordTrackingBuffer. recordIndex: " + + samRecordWithOrdinal.getRecordOrdinal()); } block.setResultState(samRecordWithOrdinal, resultState); } @@ -205,10 +224,14 @@ private class BufferBlock { private final BitSet resultStateIndexes; /** Creates an empty block buffer, with an allowable # of records in RAM */ - public BufferBlock(final int maxBlockSize, final int maxBlockRecordsInMemory, final List tmpDirs, - final SAMFileHeader header, - final long originalStartIndex) { - this.recordsQueue = DiskBackedQueue.newInstance(new BAMRecordCodec(header), maxBlockRecordsInMemory, tmpDirs); + public BufferBlock( + final int maxBlockSize, + final int maxBlockRecordsInMemory, + final List tmpDirs, + final SAMFileHeader header, + final long originalStartIndex) { + this.recordsQueue = + DiskBackedQueue.newInstance(new BAMRecordCodec(header), maxBlockRecordsInMemory, tmpDirs); this.maxBlockSize = maxBlockSize; this.currentStartIndex = 0; this.endIndex = -1; @@ -222,10 +245,14 @@ public BufferBlock(final int maxBlockSize, final int maxBlockRecordsInMemory, fi * TODO - reimplement with a circular byte array buffer PROVIDED RECORDS ARE IN MEMORY * @return */ - public boolean canAdd() { return (this.endIndex - this.originalStartIndex + 1) < this.maxBlockSize && this.recordsQueue.canAdd(); } + public boolean canAdd() { + return (this.endIndex - this.originalStartIndex + 1) < this.maxBlockSize && this.recordsQueue.canAdd(); + } /** Returns true if the record at the front of the buffer is on disk */ - public boolean headRecordIsFromDisk() { return this.recordsQueue.headRecordIsFromDisk(); } + public boolean headRecordIsFromDisk() { + return this.recordsQueue.headRecordIsFromDisk(); + } /** * Check whether we have read all possible records from this block (and it is available to be destroyed) @@ -233,14 +260,18 @@ public BufferBlock(final int maxBlockSize, final int maxBlockRecordsInMemory, fi */ public boolean hasBeenDrained() { final long maximalIndex = (this.canAdd()) ? (this.originalStartIndex + this.maxBlockSize) : this.endIndex; - return this.currentStartIndex > maximalIndex; //NB: watch out for an off by one here + return this.currentStartIndex > maximalIndex; // NB: watch out for an off by one here } /** Gets the index of the first record in this block */ - public long getStartIndex() { return this.currentStartIndex; } + public long getStartIndex() { + return this.currentStartIndex; + } /** Gets the index of the last record in this block */ - public long getEndIndex() { return this.endIndex; } + public long getEndIndex() { + return this.endIndex; + } /** Add a record to this block */ public void add(final SamRecordWithOrdinal samRecordWithOrdinal) { @@ -257,8 +288,9 @@ public void add(final SamRecordWithOrdinal samRecordWithOrdinal) { } private int ensureIndexFitsInAnInt(final long value) { - if (value < Integer.MIN_VALUE || Integer.MAX_VALUE < value) throw new SAMException("Error: index out of range: " + value); - return (int)value; + if (value < Integer.MIN_VALUE || Integer.MAX_VALUE < value) + throw new SAMException("Error: index out of range: " + value); + return (int) value; } /** @@ -271,8 +303,11 @@ private int ensureIndexFitsInAnInt(final long value) { */ public void setResultState(final SamRecordWithOrdinal samRecordWithOrdinal, final boolean resultState) { // find the correct byte array index and update both metadata byte arrays - this.wasExaminedIndexes.set(ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), true); - this.resultStateIndexes.set(ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), resultState); + this.wasExaminedIndexes.set( + ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), true); + this.resultStateIndexes.set( + ensureIndexFitsInAnInt(samRecordWithOrdinal.getRecordOrdinal() - this.originalStartIndex), + resultState); } public boolean isEmpty() { @@ -281,17 +316,20 @@ public boolean isEmpty() { public boolean canEmit() { // TODO: what if isEmpty() == true? - return this.wasExaminedIndexes.get(ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex)); + return this.wasExaminedIndexes.get( + ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex)); } public SamRecordWithOrdinal next() throws IllegalStateException { if (this.canEmit()) { try { - // create a wrapped record for the head of the queue, and set the underlying record's examined information appropriately + // create a wrapped record for the head of the queue, and set the underlying record's examined + // information appropriately final SamRecordWithOrdinal samRecordWithOrdinal = clazz.newInstance(); samRecordWithOrdinal.setRecord(this.recordsQueue.poll()); samRecordWithOrdinal.setRecordOrdinal(this.currentStartIndex); - samRecordWithOrdinal.setResultState(this.resultStateIndexes.get(ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex))); + samRecordWithOrdinal.setResultState(this.resultStateIndexes.get( + ensureIndexFitsInAnInt(this.currentStartIndex - this.originalStartIndex))); this.currentStartIndex++; return samRecordWithOrdinal; } catch (final Exception e) { @@ -305,17 +343,23 @@ public SamRecordWithOrdinal next() throws IllegalStateException { /** * Remove, but do not return, the next samRecordWithOrdinal in the iterator */ - public void remove() { this.next(); } + public void remove() { + this.next(); + } /** * Return the total number of elements in the block, both in memory and on disk */ - public long size() { return this.endIndex - this.currentStartIndex + 1; } + public long size() { + return this.endIndex - this.currentStartIndex + 1; + } /** * Close disk IO resources associated with the underlying records queue. * This must be called when a block is no longer needed in order to prevent memory leaks. */ - public void clear() { this.recordsQueue.clear(); } + public void clear() { + this.recordsQueue.clear(); + } } } diff --git a/src/main/java/htsjdk/samtools/util/SamRecordWithOrdinal.java b/src/main/java/htsjdk/samtools/util/SamRecordWithOrdinal.java index 096c4163cb..c99a341cc6 100644 --- a/src/main/java/htsjdk/samtools/util/SamRecordWithOrdinal.java +++ b/src/main/java/htsjdk/samtools/util/SamRecordWithOrdinal.java @@ -46,11 +46,22 @@ public SamRecordWithOrdinal(final SAMRecord record, final long recordOrdinal) { this.recordOrdinal = recordOrdinal; } - public SAMRecord getRecord() { return this.record; } - public void setRecord(final SAMRecord record) { this.record = record; } - public long getRecordOrdinal() { return this.recordOrdinal; } - public void setRecordOrdinal(final long recordOrdinal) { this.recordOrdinal = recordOrdinal; } + public SAMRecord getRecord() { + return this.record; + } + + public void setRecord(final SAMRecord record) { + this.record = record; + } + + public long getRecordOrdinal() { + return this.recordOrdinal; + } + + public void setRecordOrdinal(final long recordOrdinal) { + this.recordOrdinal = recordOrdinal; + } /** Set the result state on this record. */ - abstract public void setResultState(final boolean resultState); + public abstract void setResultState(final boolean resultState); } diff --git a/src/main/java/htsjdk/samtools/util/SequenceUtil.java b/src/main/java/htsjdk/samtools/util/SequenceUtil.java index 18e524ae35..e7c7c17bd8 100644 --- a/src/main/java/htsjdk/samtools/util/SequenceUtil.java +++ b/src/main/java/htsjdk/samtools/util/SequenceUtil.java @@ -34,7 +34,6 @@ import htsjdk.samtools.SAMTag; import htsjdk.samtools.fastq.FastqConstants; import htsjdk.utils.ValidationUtils; - import java.io.File; import java.math.BigInteger; import java.security.MessageDigest; @@ -51,9 +50,9 @@ public class SequenceUtil { /** Byte typed variables for all normal bases. */ public static final byte a = 'a', c = 'c', g = 'g', t = 't', n = 'n', A = 'A', C = 'C', G = 'G', T = 'T', N = 'N'; - public static final byte[] VALID_BASES_UPPER = new byte[]{A, C, G, T}; - public static final byte[] VALID_BASES_LOWER = new byte[]{a, c, g, t}; - private static final byte[] ACGTN_BASES = new byte[]{A, C, G, T, N}; + public static final byte[] VALID_BASES_UPPER = new byte[] {A, C, G, T}; + public static final byte[] VALID_BASES_LOWER = new byte[] {a, c, g, t}; + private static final byte[] ACGTN_BASES = new byte[] {A, C, G, T, N}; private static final String IUPAC_CODES_STRING = ".aAbBcCdDgGhHkKmMnNrRsStTvVwWyY"; /** * A set of bases supported by BAM in reads, see http://samtools.github.io/hts-specs/SAMv1.pdf chapter 4.2 on 'seq' field. @@ -63,18 +62,32 @@ public class SequenceUtil { private static final int BASES_ARRAY_LENGTH = 127; private static final int SHIFT_TO_LOWER_CASE = a - A; - /** - * A lookup table to find a corresponding BAM read base. - */ + /** Lookup table mapping any byte to its BAM-valid upper-case equivalent (or N if invalid). */ private static final byte[] bamReadBaseLookup = new byte[BASES_ARRAY_LENGTH]; + static { Arrays.fill(bamReadBaseLookup, N); - for (final byte base: BAM_READ_BASE_SET) { + for (final byte base : BAM_READ_BASE_SET) { bamReadBaseLookup[base] = base; bamReadBaseLookup[base + SHIFT_TO_LOWER_CASE] = base; } } + /** + * Returns a defensive copy of the BAM read base lookup table. The table maps each byte + * value (indexed by {@code value & 0x7F}) to its BAM-valid upper-case base equivalent + * (one of A, C, G, T, N, M, R, W, S, Y, K, V, H, D, B), or 'N' if the input is not a + * recognized base. Both upper and lower case inputs map to the upper case base. + * + *

    Callers that need repeated lookups on a hot path should store the returned array + * in a local or static field rather than calling this method repeatedly. + * + * @return a new copy of the 127-element lookup table + */ + public static byte[] getBamReadBaseLookup() { + return bamReadBaseLookup.clone(); + } + private static final byte A_MASK = 1; private static final byte C_MASK = 2; private static final byte G_MASK = 4; @@ -108,8 +121,8 @@ public class SequenceUtil { bases[(byte) i + SHIFT_TO_LOWER_CASE] = bases[(byte) i]; } bases['.'] = A_MASK | C_MASK | G_MASK | T_MASK; - }; - + } + ; /** * Calculate the reverse complement of the specified sequence @@ -124,7 +137,6 @@ public static String reverseComplement(final String sequenceData) { return htsjdk.samtools.util.StringUtil.bytesToString(bases); } - /** * Efficiently compare two IUPAC base codes, simply returning true if they are equal (ignoring case), * without considering the set relationships between ambiguous codes. @@ -185,7 +197,6 @@ public static boolean isUpperACGTN(final byte base) { return isValidBase(base, ACGTN_BASES); } - /** Returns all IUPAC codes as a string */ public static String getIUPACCodesString() { return IUPAC_CODES_STRING; @@ -218,8 +229,7 @@ public static boolean isBamReadBase(final byte base) { /** Update and return the given array of bases by upper casing and then replacing all non-BAM read bases with N */ public static byte[] toBamReadBasesInPlace(final byte[] bases) { - for (int i = 0; i < bases.length; i++) - bases[i] = bamReadBaseLookup[bases[i]]; + for (int i = 0; i < bases.length; i++) bases[i] = bamReadBaseLookup[bases[i]]; return bases; } @@ -242,7 +252,8 @@ public static void assertSequenceListsEqual(final List s1, fi * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since * sometimes different pipelines choose to use only the first contigs of a standard reference. */ - public static void assertSequenceListsEqual(final List s1, final List s2, final boolean checkPrefixOnly) { + public static void assertSequenceListsEqual( + final List s1, final List s2, final boolean checkPrefixOnly) { if (s1 != null && s2 != null) { final int sizeToTest; @@ -256,36 +267,35 @@ public static void assertSequenceListsEqual(final List s1, fi sizeToTest = s1.size(); if (s1.size() != s2.size()) { throw new SequenceListsDifferException( - "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() + - ")"); + "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() + ")"); } } for (int i = 0; i < sizeToTest; ++i) { if (!s1.get(i).isSameSequence(s2.get(i))) { StringBuilder s1Attrs = new StringBuilder(); - for (final java.util.Map.Entry entry : s1.get(i) - .getAttributes()) { + for (final java.util.Map.Entry entry : + s1.get(i).getAttributes()) { s1Attrs.append("/").append(entry.getKey()).append("=").append(entry.getValue()); } String s2Attrs = ""; - for (final java.util.Map.Entry entry : s2.get(i) - .getAttributes()) { + for (final java.util.Map.Entry entry : + s2.get(i).getAttributes()) { s2Attrs += "/" + entry.getKey() + "=" + entry.getValue(); } - throw new SequenceListsDifferException( - "Sequences at index " + i + " don't match: " + - s1.get(i).getSequenceIndex() + "/" + s1.get(i).getSequenceLength() + - "/" + s1.get(i).getSequenceName() + s1Attrs + " " + - s2.get(i).getSequenceIndex() + "/" + s2.get(i).getSequenceLength() + - "/" + s2.get(i).getSequenceName() + s2Attrs); + throw new SequenceListsDifferException("Sequences at index " + i + " don't match: " + + s1.get(i).getSequenceIndex() + + "/" + s1.get(i).getSequenceLength() + "/" + + s1.get(i).getSequenceName() + s1Attrs + " " + + s2.get(i).getSequenceIndex() + + "/" + s2.get(i).getSequenceLength() + "/" + + s2.get(i).getSequenceName() + s2Attrs); } } } } public static class SequenceListsDifferException extends SAMException { - public SequenceListsDifferException() { - } + public SequenceListsDifferException() {} public SequenceListsDifferException(final String s) { super(s); @@ -337,7 +347,8 @@ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since * sometimes different pipelines choose to use only the first contigs of a standard reference. */ - public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final boolean checkPrefixOnly) { + public static void assertSequenceDictionariesEqual( + final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final boolean checkPrefixOnly) { if (s1 == null || s2 == null) return; assertSequenceListsEqual(s1.getSequences(), s2.getSequences(), checkPrefixOnly); } @@ -345,12 +356,13 @@ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s /** * Throws an exception if both parameters are non-null and unequal, including the filenames. */ - public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, - final File f1, final File f2) { + public static void assertSequenceDictionariesEqual( + final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final File f1, final File f2) { try { assertSequenceDictionariesEqual(s1, s2); } catch (final SequenceListsDifferException e) { - throw new SequenceListsDifferException("In files " + f1.getAbsolutePath() + " and " + f2.getAbsolutePath(), e); + throw new SequenceListsDifferException( + "In files " + f1.getAbsolutePath() + " and " + f2.getAbsolutePath(), e); } } @@ -360,7 +372,8 @@ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s * @param alignmentStart raw aligment start, which may result in read hanging off beginning or end of read * @return cigar string that may have S operator at beginning or end, and has M operator for the rest of the read */ - public static String makeCigarStringWithPossibleClipping(final int alignmentStart, final int readLength, final int referenceSequenceLength) { + public static String makeCigarStringWithPossibleClipping( + final int alignmentStart, final int readLength, final int referenceSequenceLength) { int start = alignmentStart; int leftSoftClip = 0; if (start < 1) { @@ -389,11 +402,12 @@ public static String makeCigarStringWithPossibleClipping(final int alignmentStar * @param indelLength length of indel. Positive for insertion, negative for deletion. * @return cigar string that may have S operator at beginning or end, has one or two M operators, and an I or a D. */ - public static String makeCigarStringWithIndelPossibleClipping(final int alignmentStart, - final int readLength, - final int referenceSequenceLength, - final int indelPosition, - final int indelLength) { + public static String makeCigarStringWithIndelPossibleClipping( + final int alignmentStart, + final int readLength, + final int referenceSequenceLength, + final int indelPosition, + final int indelLength) { int start = alignmentStart; int leftSoftClip = 0; if (start < 1) { @@ -406,8 +420,8 @@ public static String makeCigarStringWithIndelPossibleClipping(final int alignmen rightSoftClip = alignmentEnd - referenceSequenceLength - 1; } if (leftSoftClip >= indelPosition) { - throw new IllegalStateException("Soft clipping entire pre-indel match. leftSoftClip: " + leftSoftClip + - "; indelPosition: " + indelPosition); + throw new IllegalStateException("Soft clipping entire pre-indel match. leftSoftClip: " + leftSoftClip + + "; indelPosition: " + indelPosition); } // CIGAR is trivial because there are no indels or clipping in Gerald final int firstMatchLength = indelPosition - leftSoftClip; @@ -415,10 +429,9 @@ public static String makeCigarStringWithIndelPossibleClipping(final int alignmen if (secondMatchLength < 1) { throw new SAMException("Unexpected cigar string with no M op for read."); } - return makeSoftClipCigar(leftSoftClip) + Integer.toString(firstMatchLength) + "M" + - Math.abs(indelLength) + (indelLength > 0 ? "I" : "D") + - Integer.toString(secondMatchLength) + "M" + - makeSoftClipCigar(rightSoftClip); + return makeSoftClipCigar(leftSoftClip) + Integer.toString(firstMatchLength) + "M" + Math.abs(indelLength) + + (indelLength > 0 ? "I" : "D") + Integer.toString(secondMatchLength) + + "M" + makeSoftClipCigar(rightSoftClip); } public static String makeSoftClipCigar(final int clipLength) { @@ -438,8 +451,12 @@ public static String makeSoftClipCigar(final int clipLength) { * @param matchAmbiguousRef causes the match to return true when the read base is a subset of the possible IUPAC reference bases, but not the other way around * @return true if the bases match, false otherwise */ - private static boolean basesMatch(final byte readBase, final byte refBase, final boolean negativeStrand, - final boolean bisulfiteSequence, final boolean matchAmbiguousRef) { + private static boolean basesMatch( + final byte readBase, + final byte refBase, + final boolean negativeStrand, + final boolean bisulfiteSequence, + final boolean matchAmbiguousRef) { if (bisulfiteSequence) { if (matchAmbiguousRef) return bisulfiteBasesMatchWithAmbiguity(negativeStrand, readBase, refBase); else return bisulfiteBasesEqual(negativeStrand, readBase, refBase); @@ -470,12 +487,20 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ - public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) { + public static int countMismatches( + final SAMRecord read, + final byte[] referenceBases, + final int referenceOffset, + final boolean bisulfiteSequence) { return countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false); } - public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, - final boolean bisulfiteSequence, final boolean matchAmbiguousRef) { + public static int countMismatches( + final SAMRecord read, + final byte[] referenceBases, + final int referenceOffset, + final boolean bisulfiteSequence, + final boolean matchAmbiguousRef) { try { int mismatches = 0; @@ -487,8 +512,12 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa final int length = block.getLength(); for (int i = 0; i < length; ++i) { - if (!basesMatch(readBases[readBlockStart + i], referenceBases[referenceBlockStart + i], - read.getReadNegativeStrandFlag(), bisulfiteSequence, matchAmbiguousRef)) { + if (!basesMatch( + readBases[readBlockStart + i], + referenceBases[referenceBlockStart + i], + read.getReadNegativeStrandFlag(), + bisulfiteSequence, + matchAmbiguousRef)) { ++mismatches; } } @@ -508,7 +537,8 @@ public static int countMismatches(final SAMRecord read, final byte[] referenceBa * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ - public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final boolean bisulfiteSequence) { + public static int countMismatches( + final SAMRecord read, final byte[] referenceBases, final boolean bisulfiteSequence) { return countMismatches(read, referenceBases, 0, bisulfiteSequence); } @@ -530,8 +560,8 @@ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] re * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ - public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] referenceBases, - final int referenceOffset) { + public static int sumQualitiesOfMismatches( + final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return sumQualitiesOfMismatches(read, referenceBases, referenceOffset, false); } @@ -546,16 +576,19 @@ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] re * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ - public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] referenceBases, - final int referenceOffset, final boolean bisulfiteSequence) { + public static int sumQualitiesOfMismatches( + final SAMRecord read, + final byte[] referenceBases, + final int referenceOffset, + final boolean bisulfiteSequence) { int qualities = 0; final byte[] readBases = read.getReadBases(); final byte[] readQualities = read.getBaseQualities(); if (read.getAlignmentStart() <= referenceOffset) { - throw new IllegalArgumentException("read.getAlignmentStart(" + read.getAlignmentStart() + - ") <= referenceOffset(" + referenceOffset + ")"); + throw new IllegalArgumentException("read.getAlignmentStart(" + read.getAlignmentStart() + + ") <= referenceOffset(" + referenceOffset + ")"); } for (final AlignmentBlock block : read.getAlignmentBlocks()) { @@ -570,7 +603,9 @@ public static int sumQualitiesOfMismatches(final SAMRecord read, final byte[] re } } else { - if (!bisulfiteBasesEqual(read.getReadNegativeStrandFlag(), readBases[readBlockStart + i], + if (!bisulfiteBasesEqual( + read.getReadNegativeStrandFlag(), + readBases[readBlockStart + i], referenceBases[referenceBlockStart + i])) { qualities += readQualities[readBlockStart + i]; } @@ -622,8 +657,7 @@ public static int calculateSamNmTag(final SAMRecord read, final byte[] reference * @param referenceOffset 0-based offset of the first element of referenceBases relative to the start * of that reference sequence. */ - public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, - final int referenceOffset) { + public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, final int referenceOffset) { return calculateSamNmTag(read, referenceBases, referenceOffset, false); } @@ -638,8 +672,11 @@ public static int calculateSamNmTag(final SAMRecord read, final byte[] reference * and C->T on the positive strand and G->A on the negative strand will not be counted * as mismatches. */ - public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases, - final int referenceOffset, final boolean bisulfiteSequence) { + public static int calculateSamNmTag( + final SAMRecord read, + final byte[] referenceBases, + final int referenceOffset, + final boolean bisulfiteSequence) { int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false); for (final CigarElement el : read.getCigar().getCigarElements()) { if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) { @@ -658,9 +695,9 @@ public static int calculateSamNmTag(final SAMRecord read, final byte[] reference public static int calculateSamNmTagFromCigar(final SAMRecord record) { int samNm = 0; for (final CigarElement el : record.getCigar().getCigarElements()) { - if ( el.getOperator() == CigarOperator.X || - el.getOperator() == CigarOperator.INSERTION || - el.getOperator() == CigarOperator.DELETION) { + if (el.getOperator() == CigarOperator.X + || el.getOperator() == CigarOperator.INSERTION + || el.getOperator() == CigarOperator.DELETION) { samNm += el.getLength(); } } @@ -691,8 +728,6 @@ public static byte complement(final byte b) { } } - - /** * Returns true if the bases are equal OR if the mismatch can be accounted for by * bisulfite treatment. C->T on the positive strand and G->A on the negative strand @@ -711,8 +746,10 @@ public static boolean bisulfiteBasesEqual(final byte read, final byte reference) * Note that isBisulfiteConverted is not affected because it only applies when the * reference base is non-ambiguous. */ - public static boolean bisulfiteBasesMatchWithAmbiguity(final boolean negativeStrand, final byte read, final byte reference) { - return (readBaseMatchesRefBaseWithAmbiguity(read, reference)) || (isBisulfiteConverted(read, reference, negativeStrand)); + public static boolean bisulfiteBasesMatchWithAmbiguity( + final boolean negativeStrand, final byte read, final byte reference) { + return (readBaseMatchesRefBaseWithAmbiguity(read, reference)) + || (isBisulfiteConverted(read, reference, negativeStrand)); } /** @@ -758,7 +795,8 @@ public static boolean isBisulfiteConverted(final byte read, final byte reference * '-'. If the read is soft-clipped, reference contains '0'. If there is a skipped region and * includeReferenceBasesForDeletions==true, reference will have Ns for the skipped region. */ - public static byte[] makeReferenceFromAlignment(final SAMRecord rec, final boolean includeReferenceBasesForDeletions) { + public static byte[] makeReferenceFromAlignment( + final SAMRecord rec, final boolean includeReferenceBasesForDeletions) { final String md = rec.getStringAttribute(SAMTag.MD); if (md == null) { throw new SAMException("Cannot create reference from SAMRecord with no MD tag, read: " + rec.getReadName()); @@ -784,7 +822,6 @@ public static byte[] makeReferenceFromAlignment(final SAMRecord rec, final boole final int cigElLen = cigEl.getLength(); final CigarOperator cigElOp = cigEl.getOperator(); - if (cigElOp == CigarOperator.SKIPPED_REGION) { // We've decided that MD tag will not contain bases for skipped regions, as they // could be megabases long, so just put N in there if caller wants reference bases, @@ -844,11 +881,13 @@ else if (cigElOp.consumesReferenceBases()) { // Check just to make sure. if (basesMatched != cigElLen) { - throw new SAMException("Got a deletion in CIGAR (" + cigar + ", deletion " + cigElLen + - " length) with an unequal ref insertion in MD (" + md + ", md " + basesMatched + " length"); + throw new SAMException("Got a deletion in CIGAR (" + cigar + ", deletion " + cigElLen + + " length) with an unequal ref insertion in MD (" + md + ", md " + basesMatched + + " length"); } if (cigElOp != CigarOperator.DELETION) { - throw new SAMException("Got an insertion in MD (" + md + ") without a corresponding deletion in cigar (" + cigar + ")"); + throw new SAMException("Got an insertion in MD (" + md + + ") without a corresponding deletion in cigar (" + cigar + ")"); } } else { @@ -857,8 +896,8 @@ else if (cigElOp.consumesReferenceBases()) { } if (!matched) { - throw new SAMException("Illegal MD pattern: " + md + " for read " + rec.getReadName() + - " with CIGAR " + rec.getCigarString()); + throw new SAMException("Illegal MD pattern: " + md + " for read " + rec.getReadName() + + " with CIGAR " + rec.getCigarString()); } } @@ -872,7 +911,6 @@ else if (cigElOp.consumesReferenceBases()) { } else { // It's an op that consumes neither read nor reference bases. Do we just ignore?? } - } if (outIndex < ret.length) { final byte[] shorter = new byte[outIndex]; @@ -938,60 +976,71 @@ public static String md5DigestToString(final byte[] digest) { return String.format(Locale.US, "%032x", new BigInteger(1, digest)); } - - public static byte[] calculateMD5(final byte[] data, final int offset, final int len) { - final MessageDigest md5_MessageDigest; + private static final ThreadLocal md5Digest = ThreadLocal.withInitial(() -> { try { - md5_MessageDigest = MessageDigest.getInstance("MD5"); - md5_MessageDigest.reset(); - - md5_MessageDigest.update(data, offset, len); - return md5_MessageDigest.digest(); + return MessageDigest.getInstance("MD5"); } catch (final NoSuchAlgorithmException e) { throw new RuntimeException(e); } + }); + + public static byte[] calculateMD5(final byte[] data, final int offset, final int len) { + final MessageDigest md = md5Digest.get(); + md.reset(); + md.update(data, offset, len); + return md.digest(); } /** - * Calculate MD and NM similarly to Samtools, except that N->N is a match. + * Compute MD string and NM count from a read's CIGAR, bases, and a reference sequence slice. + * This is the core implementation shared by {@link #calculateMdAndNmTags(SAMRecord, byte[], boolean, boolean)} + * and the CRAM decoder's NM/MD regeneration. * - * @param record Input record for which to calculate NM and MD. - * The appropriate tags will be added/updated in the record - * @param ref The reference bases for the sequence to which the record is mapped - * @param calcMD A flag indicating whether to update the MD tag in the record - * @param calcNM A flag indicating whether to update the NM tag in the record + *

    The reference bases are accessed starting at {@code refOffset} — i.e., {@code referenceBases[0]} + * corresponds to the genomic position {@code refOffset + 1} (1-based). The read's alignment start + * (1-based) determines where in the reference to begin comparing. + * + *

    Matches are determined by upper-casing both bases before comparison. N-to-N is treated as a match + * (matching samtools behavior). + * + * @param cigarElements the CIGAR elements for the read + * @param readBases the read's base sequence + * @param referenceBases the reference bases covering the read's alignment region + * @param refOffset the 0-based genomic offset of the first base in {@code referenceBases} + * @param alignmentStart the 1-based alignment start position of the read + * @return a Tuple of (MD string, NM count) */ - public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, - final boolean calcMD, final boolean calcNM) { - if (!calcMD && !calcNM) - return; - - final Cigar cigar = record.getCigar(); - final List cigarElements = cigar.getCigarElements(); - final byte[] seq = record.getReadBases(); - final int alignmentStart = record.getAlignmentStart() - 1; - int cigarIndex, blockRefPos, blockReadStart, matchCount = 0; + public static Tuple calculateMdAndNm( + final List cigarElements, + final byte[] readBases, + final byte[] referenceBases, + final int refOffset, + final int alignmentStart) { + + // blockRefPos is the 0-based position in the reference array, adjusted for the offset + final int startInRef = alignmentStart - 1 - refOffset; + int blockRefPos = startInRef; + int blockReadStart = 0; + int matchCount = 0; int nmCount = 0; final StringBuilder mdString = new StringBuilder(); - final int nElements = cigarElements.size(); - for (cigarIndex = blockReadStart = 0, blockRefPos = alignmentStart; cigarIndex < nElements; ++cigarIndex) { - final CigarElement ce = cigarElements.get(cigarIndex); + for (final CigarElement ce : cigarElements) { int inBlockOffset; final int blockLength = ce.getLength(); final CigarOperator op = ce.getOperator(); - if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ - || op == CigarOperator.X) { + + if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (inBlockOffset = 0; inBlockOffset < blockLength; ++inBlockOffset) { final int readOffset = blockReadStart + inBlockOffset; + final int refIdx = blockRefPos + inBlockOffset; - if (ref.length <= blockRefPos + inBlockOffset) break; // out of boundary + if (refIdx >= referenceBases.length) break; // out of boundary - final byte readBase = seq[readOffset]; - final byte refBase = ref[blockRefPos + inBlockOffset]; + final byte readBase = readBases[readOffset]; + final byte refBase = referenceBases[refIdx]; if ((bases[readBase] == bases[refBase]) || readBase == 0) { - // a match ++matchCount; } else { mdString.append(matchCount); @@ -1007,15 +1056,15 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref mdString.append(matchCount); mdString.append('^'); for (inBlockOffset = 0; inBlockOffset < blockLength; ++inBlockOffset) { - if (ref[blockRefPos + inBlockOffset] == 0) break; - mdString.appendCodePoint(ref[blockRefPos + inBlockOffset]); + final int refIdx = blockRefPos + inBlockOffset; + if (refIdx >= referenceBases.length || referenceBases[refIdx] == 0) break; + mdString.appendCodePoint(referenceBases[refIdx]); } matchCount = 0; if (inBlockOffset < blockLength) break; blockRefPos += blockLength; nmCount += blockLength; - } else if (op == CigarOperator.INSERTION - || op == CigarOperator.SOFT_CLIP) { + } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { blockReadStart += blockLength; if (op == CigarOperator.INSERTION) nmCount += blockLength; } else if (op == CigarOperator.SKIPPED_REGION) { @@ -1024,8 +1073,32 @@ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref } mdString.append(matchCount); - if (calcMD) record.setAttribute(SAMTag.MD, mdString.toString()); - if (calcNM) record.setAttribute(SAMTag.NM, nmCount); + return new Tuple<>(mdString.toString(), nmCount); + } + + /** + * Calculate MD and NM similarly to Samtools, except that N->N is a match. + * + * @param record Input record for which to calculate NM and MD. + * The appropriate tags will be added/updated in the record + * @param ref The reference bases for the entire contig to which the record is mapped + * (index 0 = position 1 on the contig) + * @param calcMD A flag indicating whether to update the MD tag in the record + * @param calcNM A flag indicating whether to update the NM tag in the record + */ + public static void calculateMdAndNmTags( + final SAMRecord record, final byte[] ref, final boolean calcMD, final boolean calcNM) { + if (!calcMD && !calcNM) return; + + final Tuple result = calculateMdAndNm( + record.getCigar().getCigarElements(), + record.getReadBases(), + ref, + 0, // ref array starts at position 1 on the contig + record.getAlignmentStart()); + + if (calcMD) record.setAttribute(SAMTag.MD, result.a); + if (calcNM) record.setAttribute(SAMTag.NM, result.b); } public static byte upperCase(final byte base) { @@ -1033,8 +1106,7 @@ public static byte upperCase(final byte base) { } public static byte[] upperCase(final byte[] bases) { - for (int i = 0; i < bases.length; i++) - bases[i] = upperCase(bases[i]); + for (int i = 0; i < bases.length; i++) bases[i] = upperCase(bases[i]); return bases; } @@ -1081,7 +1153,7 @@ public static List generateAllKmers(final int length) { // Read names cannot contain blanks public static String getSamReadNameFromFastqHeader(final String fastqHeader) { final int idx = fastqHeader.indexOf(" "); - String readName = (idx == -1) ? fastqHeader : fastqHeader.substring(0,idx); + String readName = (idx == -1) ? fastqHeader : fastqHeader.substring(0, idx); // NOTE: the while loop isn't necessarily the most efficient way to handle this but we don't // expect this to ever happen more than once, just trapping pathological cases @@ -1101,7 +1173,7 @@ public static String getSamReadNameFromFastqHeader(final String fastqHeader) { * @param length How many bases to return. * @return an array of random DNA bases of the requested length. */ - static public byte[] getRandomBases(Random random, final int length) { + public static byte[] getRandomBases(Random random, final int length) { ValidationUtils.validateArg(length >= 0, "length must be non-negative"); final byte[] bases = new byte[length]; getRandomBases(random, length, bases); @@ -1118,7 +1190,7 @@ static public byte[] getRandomBases(Random random, final int length) { * @param length How many bases to return. * @param bases Array to use for bases (from index 0) */ - static public void getRandomBases(Random random, final int length, final byte[] bases) { + public static void getRandomBases(Random random, final int length, final byte[] bases) { ValidationUtils.validateArg(length >= 0, "length must be non-negative"); ValidationUtils.validateArg(length <= bases.length, "length must no larger than size of input array"); diff --git a/src/main/java/htsjdk/samtools/util/SnappyLoader.java b/src/main/java/htsjdk/samtools/util/SnappyLoader.java index 7c5111dd56..a2fce53895 100644 --- a/src/main/java/htsjdk/samtools/util/SnappyLoader.java +++ b/src/main/java/htsjdk/samtools/util/SnappyLoader.java @@ -25,7 +25,6 @@ import htsjdk.samtools.Defaults; import htsjdk.samtools.SAMException; - import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -52,20 +51,25 @@ public SnappyLoader() { } else { boolean tmpAvailable; try { - //This triggers trying to import Snappy code, which causes an exception if the library is missing. + // This triggers trying to import Snappy code, which causes an exception if the library is missing. tmpAvailable = SnappyLoaderInternal.tryToLoadSnappy(); - } catch (NoClassDefFoundError e){ + } catch (NoClassDefFoundError e) { tmpAvailable = false; - logger.error(e, "Snappy java library was requested but not found. If Snappy is " + - "intentionally missing, this message may be suppressed by setting " + - "-D"+ Defaults.SAMJDK_PREFIX + Defaults.DISABLE_SNAPPY_PROPERTY_NAME + "=true " ); + logger.error( + e, + "Snappy java library was requested but not found. If Snappy is " + + "intentionally missing, this message may be suppressed by setting " + + "-D" + + Defaults.SAMJDK_PREFIX + Defaults.DISABLE_SNAPPY_PROPERTY_NAME + "=true "); } snappyAvailable = tmpAvailable; } } /** Returns true if Snappy is available, false otherwise. */ - public boolean isSnappyAvailable() { return snappyAvailable; } + public boolean isSnappyAvailable() { + return snappyAvailable; + } /** * Wrap an InputStream in a SnappyInputStream. @@ -86,11 +90,11 @@ public OutputStream wrapOutputStream(final OutputStream outputStream) { /** * Function which can throw IOExceptions */ - interface IOFunction { + interface IOFunction { R apply(T input) throws IOException; } - private R wrapWithSnappyOrThrow(T stream, IOFunction wrapper){ + private R wrapWithSnappyOrThrow(T stream, IOFunction wrapper) { if (isSnappyAvailable()) { try { return wrapper.apply(stream); @@ -100,10 +104,9 @@ private R wrapWithSnappyOrThrow(T stream, IOFunction wrapper){ } else { final String errorMessage = Defaults.DISABLE_SNAPPY_COMPRESSOR ? "Cannot wrap stream with snappy compressor because snappy was disabled via the " - + Defaults.DISABLE_SNAPPY_PROPERTY_NAME + " system property." + + Defaults.DISABLE_SNAPPY_PROPERTY_NAME + " system property." : "Cannot wrap stream with snappy compressor because we could not load the snappy library."; throw new SAMException(errorMessage); } } - } diff --git a/src/main/java/htsjdk/samtools/util/SnappyLoaderInternal.java b/src/main/java/htsjdk/samtools/util/SnappyLoaderInternal.java index 776587791b..279878cd9f 100644 --- a/src/main/java/htsjdk/samtools/util/SnappyLoaderInternal.java +++ b/src/main/java/htsjdk/samtools/util/SnappyLoaderInternal.java @@ -1,14 +1,13 @@ package htsjdk.samtools.util; import htsjdk.annotations.InternalAPI; -import org.xerial.snappy.SnappyError; -import org.xerial.snappy.SnappyInputStream; -import org.xerial.snappy.SnappyOutputStream; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import org.xerial.snappy.SnappyError; +import org.xerial.snappy.SnappyInputStream; +import org.xerial.snappy.SnappyOutputStream; /** * This class is the only one which should actually import Snappy Classes. It is separated from SnappyLoader to allow @@ -21,7 +20,8 @@ @InternalAPI class SnappyLoaderInternal { private static final Log logger = Log.getInstance(SnappyLoaderInternal.class); - private static final int SNAPPY_BLOCK_SIZE = 32768; // keep this as small as can be without hurting compression ratio. + private static final int SNAPPY_BLOCK_SIZE = + 32768; // keep this as small as can be without hurting compression ratio. /** * Try to load Snappy's native library. @@ -33,7 +33,7 @@ class SnappyLoaderInternal { static boolean tryToLoadSnappy() { final boolean snappyAvailable; boolean tmpSnappyAvailable = false; - try (final OutputStream test = new SnappyOutputStream(new ByteArrayOutputStream(1000))){ + try (final OutputStream test = new SnappyOutputStream(new ByteArrayOutputStream(1000))) { test.write("Hello World!".getBytes()); tmpSnappyAvailable = true; logger.debug("Snappy successfully loaded."); @@ -44,26 +44,28 @@ static boolean tryToLoadSnappy() { * IOException: potentially thrown by the `test.write` and `test.close` calls. * SnappyError: potentially thrown for a variety of reasons by Snappy. */ - catch (final ExceptionInInitializerError | IllegalStateException | IOException | SnappyError e) { + catch (final ExceptionInInitializerError + | UnsatisfiedLinkError + | IllegalStateException + | IOException + | SnappyError e) { logger.warn(e, "Snappy native library failed to load."); } snappyAvailable = tmpSnappyAvailable; return snappyAvailable; } - /** * @return a function which wraps an InputStream in a new SnappyInputStream */ - static SnappyLoader.IOFunction getInputStreamWrapper(){ + static SnappyLoader.IOFunction getInputStreamWrapper() { return SnappyInputStream::new; } /** * @return a function which wraps an OutputStream in a new SnappyOutputStream with an appropriate block size */ - static SnappyLoader.IOFunction getOutputStreamWrapper(){ + static SnappyLoader.IOFunction getOutputStreamWrapper() { return (stream) -> new SnappyOutputStream(stream, SNAPPY_BLOCK_SIZE); } - } diff --git a/src/main/java/htsjdk/samtools/util/SolexaQualityConverter.java b/src/main/java/htsjdk/samtools/util/SolexaQualityConverter.java index ba12678824..579c794a29 100644 --- a/src/main/java/htsjdk/samtools/util/SolexaQualityConverter.java +++ b/src/main/java/htsjdk/samtools/util/SolexaQualityConverter.java @@ -42,11 +42,11 @@ public class SolexaQualityConverter { /** * This value is removed from an Illumina 1.8 quality score to make it a Phred score */ - public final static int ILLUMINA_TO_PHRED_SUBTRAHEND = SOLEXA_ADDEND - PHRED_ADDEND; + public static final int ILLUMINA_TO_PHRED_SUBTRAHEND = SOLEXA_ADDEND - PHRED_ADDEND; private static SolexaQualityConverter singleton = null; - public static synchronized SolexaQualityConverter getSingleton() { + public static synchronized SolexaQualityConverter getSingleton() { if (singleton == null) { singleton = new SolexaQualityConverter(); } @@ -67,10 +67,9 @@ private SolexaQualityConverter() { } } - /** Converts a solexa character quality into a phred numeric quality. */ private byte convertSolexaQualityCharToPhredBinary(final int solexaQuality) { - return (byte) Math.round(10d * Math.log10(1d+Math.pow(10d, (solexaQuality - SOLEXA_ADDEND)/10d))); + return (byte) Math.round(10d * Math.log10(1d + Math.pow(10d, (solexaQuality - SOLEXA_ADDEND) / 10d))); } /** @@ -94,7 +93,7 @@ public byte[] getSolexaToPhredConversionTable() { * Decode in place in order to avoid extra object allocation. */ public void convertSolexaQualityCharsToPhredBinary(final byte[] solexaQuals) { - for (int i=0; i extends Cloneable { * For sorting, both when spilling records to file, and merge sorting. */ private final Comparator comparator; + private final int maxRecordsInRam; private int numRecordsInRam = 0; private T[] ramRecords; @@ -142,9 +142,13 @@ public interface Codec extends Cloneable { * @param printRecordSizeSampling If true record size will be sampled and output at DEBUG log level * @param tmpDir Where to write files of records that will not fit in RAM */ - private SortingCollection(final Class componentType, final SortingCollection.Codec codec, - final Comparator comparator, final int maxRecordsInRam, - final boolean printRecordSizeSampling, final Path... tmpDir) { + private SortingCollection( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRam, + final boolean printRecordSizeSampling, + final Path... tmpDir) { if (maxRecordsInRam <= 0) { throw new IllegalArgumentException("maxRecordsInRam must be > 0"); } @@ -182,15 +186,16 @@ public void add(final T rec) { spillToDisk(); if (printRecordSizeSampling) { - //Garbage collect again and get free memory + // Garbage collect again and get free memory Runtime.getRuntime().gc(); long endMem = Runtime.getRuntime().freeMemory(); long usedBytes = endMem - startMem; - log.debug(String.format("%d records in ram required approximately %s memory or %s per record. ", maxRecordsInRam, + log.debug(String.format( + "%d records in ram required approximately %s memory or %s per record. ", + maxRecordsInRam, StringUtil.humanReadableByteCount(usedBytes), StringUtil.humanReadableByteCount(usedBytes / maxRecordsInRam))); - } } ramRecords[numRecordsInRam++] = rec; @@ -247,8 +252,8 @@ public void spillToDisk() { Arrays.parallelSort(this.ramRecords, 0, this.numRecordsInRam, this.comparator); final Path f = newTempFile(); - try (OutputStream os - = tempStreamFactory.wrapTempOutputStream(Files.newOutputStream(f), Defaults.BUFFER_SIZE)) { + try (OutputStream os = + tempStreamFactory.wrapTempOutputStream(Files.newOutputStream(f), Defaults.BUFFER_SIZE)) { this.codec.setOutputStream(os); for (int i = 0; i < this.numRecordsInRam; ++i) { this.codec.encode(ramRecords[i]); @@ -257,8 +262,10 @@ public void spillToDisk() { } os.flush(); } catch (RuntimeIOException ex) { - throw new RuntimeIOException("Problem writing temporary file " + f.toUri() + - ". Try setting TMP_DIR to a file system with lots of space.", ex); + throw new RuntimeIOException( + "Problem writing temporary file " + f.toUri() + + ". Try setting TMP_DIR to a file system with lots of space.", + ex); } this.numRecordsInRam = 0; @@ -268,7 +275,6 @@ public void spillToDisk() { } } - /** * Creates a new tmp file on one of the available temp filesystems, registers it for deletion * on JVM exit and then returns it. @@ -318,13 +324,19 @@ public void cleanup() { * @deprecated since 2017-09. Use {@link #newInstance(Class, Codec, Comparator, int, Path...)} instead */ @Deprecated - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final File... tmpDir) { - return new SortingCollection<>(componentType, codec, comparator, maxRecordsInRAM, false, Arrays.stream(tmpDir).map(File::toPath).toArray(Path[]::new)); - + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final File... tmpDir) { + return new SortingCollection<>( + componentType, + codec, + comparator, + maxRecordsInRAM, + false, + Arrays.stream(tmpDir).map(File::toPath).toArray(Path[]::new)); } /** @@ -338,18 +350,19 @@ public static SortingCollection newInstance(final Class componentType, * @deprecated since 2017-09. Use {@link #newInstanceFromPaths(Class, Codec, Comparator, int, Collection)} instead */ @Deprecated - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final Collection tmpDirs) { - return new SortingCollection<>(componentType, + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final Collection tmpDirs) { + return new SortingCollection<>( + componentType, codec, comparator, maxRecordsInRAM, false, tmpDirs.stream().map(File::toPath).toArray(Path[]::new)); - } /** @@ -361,13 +374,15 @@ public static SortingCollection newInstance(final Class componentType, * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk * @param printRecordSizeSampling If true record size will be sampled and output at DEBUG log level */ - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final boolean printRecordSizeSampling) { + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final boolean printRecordSizeSampling) { final Path tmpDir = Paths.get(System.getProperty("java.io.tmpdir")); - return new SortingCollection<>(componentType, codec, comparator, maxRecordsInRAM, printRecordSizeSampling, tmpDir); + return new SortingCollection<>( + componentType, codec, comparator, maxRecordsInRAM, printRecordSizeSampling, tmpDir); } /** @@ -380,13 +395,15 @@ public static SortingCollection newInstance(final Class componentType, * @param printRecordSizeSampling If true record size will be sampled and output at DEBUG log level * @param tmpDir Where to write files of records that will not fit in RAM */ - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final boolean printRecordSizeSampling, - final Path... tmpDir) { - return new SortingCollection<>(componentType, codec, comparator, maxRecordsInRAM, printRecordSizeSampling, tmpDir); + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final boolean printRecordSizeSampling, + final Path... tmpDir) { + return new SortingCollection<>( + componentType, codec, comparator, maxRecordsInRAM, printRecordSizeSampling, tmpDir); } /** @@ -397,10 +414,11 @@ public static SortingCollection newInstance(final Class componentType, * @param comparator Defines output sort order * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk */ - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM) { + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM) { final Path tmpDir = Paths.get(System.getProperty("java.io.tmpdir")); return new SortingCollection<>(componentType, codec, comparator, maxRecordsInRAM, false, tmpDir); } @@ -414,11 +432,12 @@ public static SortingCollection newInstance(final Class componentType, * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk * @param tmpDir Where to write files of records that will not fit in RAM */ - public static SortingCollection newInstance(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final Path... tmpDir) { + public static SortingCollection newInstance( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final Path... tmpDir) { return new SortingCollection<>(componentType, codec, comparator, maxRecordsInRAM, false, tmpDir); } @@ -431,17 +450,14 @@ public static SortingCollection newInstance(final Class componentType, * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk * @param tmpDirs Where to write files of records that will not fit in RAM */ - public static SortingCollection newInstanceFromPaths(final Class componentType, - final SortingCollection.Codec codec, - final Comparator comparator, - final int maxRecordsInRAM, - final Collection tmpDirs) { - return new SortingCollection<>(componentType, - codec, - comparator, - maxRecordsInRAM, - false, - tmpDirs.toArray(new Path[tmpDirs.size()])); + public static SortingCollection newInstanceFromPaths( + final Class componentType, + final SortingCollection.Codec codec, + final Comparator comparator, + final int maxRecordsInRAM, + final Collection tmpDirs) { + return new SortingCollection<>( + componentType, codec, comparator, maxRecordsInRAM, false, tmpDirs.toArray(new Path[tmpDirs.size()])); } /** @@ -451,7 +467,8 @@ class InMemoryIterator implements CloseableIterator { private int iterationIndex = 0; InMemoryIterator() { - Arrays.parallelSort(SortingCollection.this.ramRecords, + Arrays.parallelSort( + SortingCollection.this.ramRecords, 0, SortingCollection.this.numRecordsInRam, SortingCollection.this.comparator); @@ -541,7 +558,8 @@ private int checkMemoryAndAdjustBuffer(int numFiles) { log.warn("There is not enough memory per file for buffering. Reading will be unbuffered."); bufferSize = 0; } else if (bufferSize > memoryPerFile) { - log.warn(String.format("Default io buffer size of %s is larger than available memory per file of %s.", + log.warn(String.format( + "Default io buffer size of %s is larger than available memory per file of %s.", StringUtil.humanReadableByteCount(bufferSize), StringUtil.humanReadableByteCount(memoryPerFile))); bufferSize = memoryPerFile; @@ -636,7 +654,6 @@ public void close() { } } - /** * Just a typedef */ diff --git a/src/main/java/htsjdk/samtools/util/SortingLongCollection.java b/src/main/java/htsjdk/samtools/util/SortingLongCollection.java index 232b5ecaa9..75218ca386 100644 --- a/src/main/java/htsjdk/samtools/util/SortingLongCollection.java +++ b/src/main/java/htsjdk/samtools/util/SortingLongCollection.java @@ -27,9 +27,6 @@ import java.io.DataOutputStream; import java.io.EOFException; import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.io.Serializable; import java.nio.file.Files; @@ -77,7 +74,6 @@ public class SortingLongCollection { private int numValuesInRam = 0; private long[] ramValues; - /** * Set to true when done adding and ready to iterate */ @@ -157,8 +153,8 @@ public void doneAddingStartIteration() { spillToDisk(); } - this.priorityQueue = new PriorityQueue(files.size(), - new PeekFileValueIteratorComparator()); + this.priorityQueue = + new PriorityQueue(files.size(), new PeekFileValueIteratorComparator()); for (final Path f : files) { final FileValueIterator it = new FileValueIterator(f); if (it.hasNext()) { @@ -212,7 +208,6 @@ public void cleanup() { IOUtil.deletePaths(this.files); } - /** * Call only after doneAddingStartIteration() has been called. * @@ -299,7 +294,6 @@ void close() { } } - /** * Add peek() functionality to FileValueIterator */ diff --git a/src/main/java/htsjdk/samtools/util/StopWatch.java b/src/main/java/htsjdk/samtools/util/StopWatch.java index 8e129165ab..1d3a0501a0 100644 --- a/src/main/java/htsjdk/samtools/util/StopWatch.java +++ b/src/main/java/htsjdk/samtools/util/StopWatch.java @@ -32,13 +32,11 @@ public class StopWatch { private long elapsedTime = 0; private boolean running = false; - public void start() { this.startTime = System.currentTimeMillis(); this.running = true; } - public void stop() { long stopTime = System.currentTimeMillis(); elapsedTime += stopTime - startTime; @@ -59,14 +57,13 @@ public void reset() { public long getElapsedTime() { final long currentElapsed; if (running) { - currentElapsed = (System.currentTimeMillis() - startTime); + currentElapsed = (System.currentTimeMillis() - startTime); } else { currentElapsed = 0; } return currentElapsed + elapsedTime; } - /** * @return same as getElapsedTime(), but truncated to seconds. */ diff --git a/src/main/java/htsjdk/samtools/util/StringUtil.java b/src/main/java/htsjdk/samtools/util/StringUtil.java index acdf869f60..d92b77ed44 100644 --- a/src/main/java/htsjdk/samtools/util/StringUtil.java +++ b/src/main/java/htsjdk/samtools/util/StringUtil.java @@ -48,7 +48,7 @@ public static String join(final String separator, final Collection objs) final StringBuilder ret = new StringBuilder(); for (final Object obj : objs) { - if(notFirst) { + if (notFirst) { ret.append(separator); } ret.append(obj.toString()); @@ -62,7 +62,6 @@ public static String join(final String separator, final T... objs) { return join(separator, values); } - /** * Split the string into tokens separated by the given delimiter. Profiling has * revealed that the standard string.split() method typically takes > 1/2 @@ -80,23 +79,19 @@ public static int split(final String aString, final String[] tokens, final char int nTokens = 0; int start = 0; int end = aString.indexOf(delim); - if(end < 0) { + if (end < 0) { tokens[nTokens++] = aString; return nTokens; } - while ((end >= 0) && (nTokens < maxTokens)) - { + while ((end >= 0) && (nTokens < maxTokens)) { tokens[nTokens++] = aString.substring(start, end); start = end + 1; end = aString.indexOf(delim, start); - } // Add the trailing string, if there is room and if it is not empty. - if (nTokens < maxTokens) - { + if (nTokens < maxTokens) { final String trailingString = aString.substring(start); - if (!trailingString.isEmpty()) - { + if (!trailingString.isEmpty()) { tokens[nTokens++] = trailingString; } } @@ -121,21 +116,18 @@ public static int splitConcatenateExcessTokens(final String aString, final Strin int nTokens = 0; int start = 0; int end = aString.indexOf(delim); - if(end < 0) { + if (end < 0) { tokens[nTokens++] = aString; return nTokens; } - while ((end >= 0) && (nTokens < maxTokens - 1)) - { + while ((end >= 0) && (nTokens < maxTokens - 1)) { tokens[nTokens++] = aString.substring(start, end); start = end + 1; end = aString.indexOf(delim, start); - } // Add the trailing string, if it is not empty. final String trailingString = aString.substring(start); - if (!trailingString.isEmpty()) - { + if (!trailingString.isEmpty()) { tokens[nTokens++] = trailingString; } return nTokens; @@ -149,7 +141,7 @@ public static byte toLowerCase(final byte b) { if (b < 'A' || b > 'Z') { return b; } - return (byte)(b - UPPER_CASE_OFFSET); + return (byte) (b - UPPER_CASE_OFFSET); } /** @@ -160,7 +152,7 @@ public static byte toUpperCase(final byte b) { if (b < 'a' || b > 'z') { return b; } - return (byte)(b + UPPER_CASE_OFFSET); + return (byte) (b + UPPER_CASE_OFFSET); } /** @@ -168,14 +160,13 @@ public static byte toUpperCase(final byte b) { */ public static void toUpperCase(final byte[] bytes) { final int length = bytes.length; - for (int i=0; i= 'a' && bytes[i] <= 'z') { bytes[i] = (byte) (bytes[i] + UPPER_CASE_OFFSET); } } } - /** * Checks that a String doesn't contain one or more characters of interest. * @@ -186,9 +177,10 @@ public static void toUpperCase(final byte[] bytes) { */ public static String assertCharactersNotInString(final String illegalChars, final char... chars) { for (final char illegalChar : illegalChars.toCharArray()) { - for (final char ch: chars) { + for (final char ch : chars) { if (illegalChar == ch) { - throw new IllegalArgumentException("Supplied String contains illegal character '" + illegalChar + "'."); + throw new IllegalArgumentException( + "Supplied String contains illegal character '" + illegalChar + "'."); } } } @@ -198,13 +190,13 @@ public static String assertCharactersNotInString(final String illegalChars, fina /** * Return input string with newlines inserted to ensure that all lines - * have length <= maxLineLength. if a word is too long, it is simply broken + * have length {@code <=} maxLineLength. if a word is too long, it is simply broken * at maxLineLength. Does not handle tabs intelligently (due to implementer laziness). */ public static String wordWrap(final String s, final int maxLineLength) { final String[] lines = s.split("\n"); final StringBuilder sb = new StringBuilder(); - for (final String line: lines) { + for (final String line : lines) { if (sb.length() > 0) { sb.append('\n'); } @@ -249,12 +241,11 @@ public static String wordWrapSingleLine(final String s, final int maxLineLength) return sb.toString(); } - public static String intValuesToString(final int[] intVals) { final StringBuilder sb = new StringBuilder(intVals.length); - if(intVals.length > 0) { + if (intVals.length > 0) { sb.append(String.valueOf(intVals[0])); - for(int i = 1; i < intVals.length; i++) { + for (int i = 1; i < intVals.length; i++) { sb.append(", "); sb.append(String.valueOf(intVals[i])); } @@ -265,9 +256,9 @@ public static String intValuesToString(final int[] intVals) { public static String intValuesToString(final short[] shortVals) { final StringBuilder sb = new StringBuilder(shortVals.length); - if(shortVals.length > 0) { + if (shortVals.length > 0) { sb.append(String.valueOf(shortVals[0])); - for(int i = 1; i < shortVals.length; i++) { + for (int i = 1; i < shortVals.length; i++) { sb.append(", "); sb.append(String.valueOf(shortVals[i])); } @@ -290,28 +281,28 @@ public static String bytesToString(final byte[] data) { @SuppressWarnings("deprecation") public static String bytesToString(final byte[] buffer, final int offset, final int length) { -/* - The non-deprecated way, that requires allocating char[] - final char[] charBuffer = new char[length]; - for (int i = 0; i < length; ++i) { - charBuffer[i] = (char)buffer[i+offset]; - } - return new String(charBuffer); -*/ + /* + The non-deprecated way, that requires allocating char[] + final char[] charBuffer = new char[length]; + for (int i = 0; i < length; ++i) { + charBuffer[i] = (char)buffer[i+offset]; + } + return new String(charBuffer); + */ return new String(buffer, 0, offset, length); } @SuppressWarnings("deprecation") public static byte[] stringToBytes(final String s) { -/* - The non-deprecated way, that requires allocating char[] - final byte[] byteBuffer = new byte[s.length()]; - final char[] charBuffer = s.toCharArray(); - for (int i = 0; i < charBuffer.length; ++i) { - byteBuffer[i] = (byte)(charBuffer[i] & 0xff); - } - return byteBuffer; -*/ + /* + The non-deprecated way, that requires allocating char[] + final byte[] byteBuffer = new byte[s.length()]; + final char[] charBuffer = s.toCharArray(); + for (int i = 0; i < charBuffer.length; ++i) { + byteBuffer[i] = (byte)(charBuffer[i] & 0xff); + } + return byteBuffer; + */ if (s == null) { return null; } @@ -335,7 +326,7 @@ public static byte[] stringToBytes(final String s, final int offset, final int l public static String readNullTerminatedString(final BinaryCodec binaryCodec) { final StringBuilder ret = new StringBuilder(); for (byte b = binaryCodec.readByte(); b != 0; b = binaryCodec.readByte()) { - ret.append((char)(b & 0xff)); + ret.append((char) (b & 0xff)); } return ret.toString(); } @@ -348,10 +339,10 @@ public static String readNullTerminatedString(final BinaryCodec binaryCodec) { * @param bytes where to put the converted output * @param byteOffset where to start writing the converted output. */ - public static void charsToBytes(final char[] chars, final int charOffset, final int length, - final byte[] bytes, final int byteOffset) { + public static void charsToBytes( + final char[] chars, final int charOffset, final int length, final byte[] bytes, final int byteOffset) { for (int i = 0; i < length; ++i) { - bytes[byteOffset + i] = (byte)chars[charOffset + i]; + bytes[byteOffset + i] = (byte) chars[charOffset + i]; } } @@ -359,14 +350,14 @@ public static void charsToBytes(final char[] chars, final int charOffset, final * Convert ASCII char to byte. */ public static byte charToByte(final char c) { - return (byte)c; + return (byte) c; } /** * Convert ASCII byte to ASCII char. */ public static char byteToChar(final byte b) { - return (char)(b & 0xff); + return (char) (b & 0xff); } /** @@ -378,8 +369,8 @@ public static String bytesToHexString(final byte[] data) { final char[] chars = new char[2 * data.length]; for (int i = 0; i < data.length; i++) { final byte b = data[i]; - chars[2*i] = toHexDigit((b >> 4) & 0xF); - chars[2*i+1] = toHexDigit(b & 0xF); + chars[2 * i] = toHexDigit((b >> 4) & 0xF); + chars[2 * i + 1] = toHexDigit(b & 0xF); } return new String(chars); } @@ -391,9 +382,10 @@ public static String bytesToHexString(final byte[] data) { * @return byte array with binary representation of hex string. * @throws NumberFormatException */ - public static byte[] hexStringToBytes(final String s) throws NumberFormatException { + public static byte[] hexStringToBytes(final String s) throws NumberFormatException { if (s.length() % 2 != 0) { - throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s); + throw new NumberFormatException( + "Hex representation of byte string does not have even number of hex chars: " + s); } final byte[] ret = new byte[s.length() / 2]; for (int i = 0; i < ret.length; ++i) { @@ -445,14 +437,14 @@ public static boolean isBlank(String str) { return true; } for (int i = 0; i < strLen; i++) { - if (!Character.isWhitespace(str.charAt(i)) ) { + if (!Character.isWhitespace(str.charAt(i))) { return false; } } return true; } - /*

    Generates a string of one character to a specified length

    + /*

    Generates a string of one character to a specified length

    * * @param c the Character to repeat * @param repeatNumber the number of times to repeat the character @@ -466,44 +458,46 @@ public static String repeatCharNTimes(char c, int repeatNumber) { /** Returns {@link Object#toString()} of the provided value if it isn't null; "" otherwise. */ public static final String EMPTY_STRING = ""; + public static String asEmptyIfNull(final Object string) { return string == null ? EMPTY_STRING : string.toString(); } /* - * This is from GIT! - * This function implements the Damerau-Levenshtein algorithm to - * calculate a distance between strings. - * - * Basically, it says how many letters need to be swapped, substituted, - * deleted from, or added to string1, at least, to get string2. - * - * The idea is to build a distance matrix for the substrings of both - * strings. To avoid a large space complexity, only the last three rows - * are kept in memory (if swaps had the same or higher cost as one deletion - * plus one insertion, only two rows would be needed). - * - * At any stage, "i + 1" denotes the length of the current substring of - * string1 that the distance is calculated for. - * - * row2 holds the current row, row1 the previous row (i.e. for the substring - * of string1 of length "i"), and row0 the row before that. - * - * In other words, at the start of the big loop, row2[j + 1] contains the - * Damerau-Levenshtein distance between the substring of string1 of length - * "i" and the substring of string2 of length "j + 1". - * - * All the big loop does is determine the partial minimum-cost paths. - * - * It does so by calculating the costs of the path ending in characters - * i (in string1) and j (in string2), respectively, given that the last - * operation is a substitution, a swap, a deletion, or an insertion. - * - * This implementation allows the costs to be weighted: - * - * Note that this algorithm calculates a distance _iff_ d == a. - */ - public static int levenshteinDistance(final String string1, final String string2, int swap, int substitution, int insertion, int deletion) { + * This is from GIT! + * This function implements the Damerau-Levenshtein algorithm to + * calculate a distance between strings. + * + * Basically, it says how many letters need to be swapped, substituted, + * deleted from, or added to string1, at least, to get string2. + * + * The idea is to build a distance matrix for the substrings of both + * strings. To avoid a large space complexity, only the last three rows + * are kept in memory (if swaps had the same or higher cost as one deletion + * plus one insertion, only two rows would be needed). + * + * At any stage, "i + 1" denotes the length of the current substring of + * string1 that the distance is calculated for. + * + * row2 holds the current row, row1 the previous row (i.e. for the substring + * of string1 of length "i"), and row0 the row before that. + * + * In other words, at the start of the big loop, row2[j + 1] contains the + * Damerau-Levenshtein distance between the substring of string1 of length + * "i" and the substring of string2 of length "j + 1". + * + * All the big loop does is determine the partial minimum-cost paths. + * + * It does so by calculating the costs of the path ending in characters + * i (in string1) and j (in string2), respectively, given that the last + * operation is a substitution, a swap, a deletion, or an insertion. + * + * This implementation allows the costs to be weighted: + * + * Note that this algorithm calculates a distance _iff_ d == a. + */ + public static int levenshteinDistance( + final String string1, final String string2, int swap, int substitution, int insertion, int deletion) { int i, j; int[] row0 = new int[(string2.length() + 1)]; @@ -526,9 +520,11 @@ public static int levenshteinDistance(final String string1, final String string2 row2[j + 1] += substitution; } /* swap */ - if (i > 0 && j > 0 && str1[i - 1] == str2[j] && - str1[i] == str2[j - 1] && - row2[j + 1] > row0[j - 1] + swap) { + if (i > 0 + && j > 0 + && str1[i - 1] == str2[j] + && str1[i] == str2[j - 1] + && row2[j + 1] > row0[j - 1] + swap) { row2[j + 1] = row0[j - 1] + swap; } /* deletion */ @@ -565,11 +561,13 @@ public static int levenshteinDistance(final String string1, final String string2 */ public static int hammingDistance(final String s1, final String s2) { if (s1.length() != s2.length()) { - throw new IllegalArgumentException("Attempted to determine Hamming distance of strings with differing lengths. " + - "The first string has length " + s1.length() + " and the second string has length " + s2.length() + "."); + throw new IllegalArgumentException( + "Attempted to determine Hamming distance of strings with differing lengths. " + + "The first string has length " + s1.length() + " and the second string has length " + + s2.length() + "."); } int measuredDistance = 0; - for (int i = 0;i < s1.length();i++) { + for (int i = 0; i < s1.length(); i++) { if (s1.charAt(i) != s2.charAt(i)) { measuredDistance++; } @@ -591,10 +589,11 @@ public static int hammingDistance(final String s1, final String s2) { */ public static boolean isWithinHammingDistance(final String s1, final String s2, final int maxHammingDistance) { if (s1.length() != s2.length()) { - throw new IllegalArgumentException("Attempted to determine if two strings of different length were within a specified edit distance."); + throw new IllegalArgumentException( + "Attempted to determine if two strings of different length were within a specified edit distance."); } int measuredDistance = 0; - for (int i = 0;i < s1.length();i++) { + for (int i = 0; i < s1.length(); i++) { if (s1.charAt(i) != s2.charAt(i)) { measuredDistance++; // If the measuredDistance is larger than the maxHammingDistance we can short circuit and return diff --git a/src/main/java/htsjdk/samtools/util/TempStreamFactory.java b/src/main/java/htsjdk/samtools/util/TempStreamFactory.java index d807d551c1..d1af61bcfb 100644 --- a/src/main/java/htsjdk/samtools/util/TempStreamFactory.java +++ b/src/main/java/htsjdk/samtools/util/TempStreamFactory.java @@ -24,7 +24,6 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.BufferedOutputStream; import java.io.InputStream; import java.io.OutputStream; diff --git a/src/main/java/htsjdk/samtools/util/TestUtil.java b/src/main/java/htsjdk/samtools/util/TestUtil.java index eb324239fd..0567dd0cf1 100644 --- a/src/main/java/htsjdk/samtools/util/TestUtil.java +++ b/src/main/java/htsjdk/samtools/util/TestUtil.java @@ -24,19 +24,13 @@ package htsjdk.samtools.util; import htsjdk.samtools.SAMException; - import java.io.*; -import java.nio.file.FileVisitResult; -import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.SimpleFileVisitor; -import java.nio.file.attribute.BasicFileAttributes; public class TestUtil { public static final int RANDOM_SEED = 42; - /** * Base url where all test files for http tests are found */ @@ -49,10 +43,8 @@ public static File getTempDirectory(final String prefix, final String suffix) { } catch (IOException e) { throw new SAMException("Failed to create temporary file.", e); } - if (!tempDirectory.delete()) - throw new SAMException("Failed to delete file: " + tempDirectory); - if (!tempDirectory.mkdir()) - throw new SAMException("Failed to make directory: " + tempDirectory); + if (!tempDirectory.delete()) throw new SAMException("Failed to delete file: " + tempDirectory); + if (!tempDirectory.mkdir()) throw new SAMException("Failed to make directory: " + tempDirectory); tempDirectory.deleteOnExit(); return tempDirectory; } @@ -74,7 +66,8 @@ public static File getTempDirecory(final String prefix, final String suffix) { * @throws IOException * @throws ClassNotFoundException */ - public static T serializeAndDeserialize(T input) throws IOException, ClassNotFoundException { + public static T serializeAndDeserialize(T input) + throws IOException, ClassNotFoundException { final ByteArrayOutputStream byteArrayStream = new ByteArrayOutputStream(); final ObjectOutputStream out = new ObjectOutputStream(byteArrayStream); diff --git a/src/main/java/htsjdk/samtools/util/TrimmingUtil.java b/src/main/java/htsjdk/samtools/util/TrimmingUtil.java index 06667d3e9f..5cc8fcb16a 100644 --- a/src/main/java/htsjdk/samtools/util/TrimmingUtil.java +++ b/src/main/java/htsjdk/samtools/util/TrimmingUtil.java @@ -53,7 +53,7 @@ public static int findQualityTrimPoint(final byte[] quals, final int trimQual) { int score = 0, maxScore = 0, trimPoint = length; if (trimQual < 1 || length == 0) return 0; - for (int i=length-1; i>=0; --i) { + for (int i = length - 1; i >= 0; --i) { score += trimQual - (quals[i]); if (score < 0) break; if (score > maxScore) { diff --git a/src/main/java/htsjdk/samtools/util/ftp/FTPClient.java b/src/main/java/htsjdk/samtools/util/ftp/FTPClient.java index dc15915813..e28c347687 100644 --- a/src/main/java/htsjdk/samtools/util/ftp/FTPClient.java +++ b/src/main/java/htsjdk/samtools/util/ftp/FTPClient.java @@ -1,242 +1,232 @@ - -/* - * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. - * - * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), - * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. - * - * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR - * WARRANTIES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, - * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR - * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER - * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE - * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES - * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, - * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER - * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT - * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. - */ - -package htsjdk.samtools.util.ftp; - -import htsjdk.samtools.SAMException; - -import java.io.BufferedReader; -import java.io.FilterInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.PrintStream; -import java.net.Socket; -import java.util.NoSuchElementException; -import java.util.StringTokenizer; - -/** - * @author jrobinso - * @date Oct 30, 2010 - */ -public class FTPClient { - - private Socket commandSocket = null; - - public static final int READ_TIMEOUT = 5 * 60 * 1000; - - /** - * Stream to write commands. - * NOTE -- a PrintStream is used no purpose (as opposed to PrintWriter). PrintWriter will not work! - */ - private PrintStream commandStream = null; - private BufferedReader responseReader = null; - private InputStream dataStream; - private String passiveHost; - private int passivePort; - long restPosition = -1; - String host; - - /** - * Connects to the given FTP host on the default port. - */ - public FTPReply connect(String host) throws IOException { - this.host = host; - commandSocket = new Socket(host, 21); - commandSocket.setSoTimeout(READ_TIMEOUT); - commandStream = new PrintStream(commandSocket.getOutputStream()); - responseReader = new BufferedReader(new InputStreamReader(commandSocket.getInputStream())); - - FTPReply reply = new FTPReply(responseReader); - - if (!reply.isPositiveCompletion()) { - disconnect(); - } - - return reply; - } - - - /** - * Executes the given FTP command on our current connection, returning the - * three digit response code from the server. This method only works for - * commands that do not require an additional data port. - */ - public FTPReply executeCommand(String command) throws IOException { - commandStream.println(command); - return new FTPReply(responseReader); - } - - - /** - * Wrapper for the commands user [username] and pass - * [password]. - */ - public FTPReply login(String username, String password) throws IOException { - FTPReply response = executeCommand("user " + username); - if (!response.isPositiveIntermediate()) return response; - response = executeCommand("pass " + password); - return response; - } - - public FTPReply quit() throws IOException { - return executeCommand("QUIT"); - } - - public FTPReply binary() throws IOException { - return executeCommand("TYPE I"); - } - - - public FTPReply pasv() throws IOException { - - FTPReply reply = executeCommand("PASV"); - - if (reply.getCode() == 226 || reply.getCode() == 426) { - reply = getReply(); - } - - String response = reply.getReplyString(); - - - int code = reply.getCode(); - - int opening = response.indexOf('('); - int closing = response.indexOf(')', opening + 1); - if (closing > 0) { - String dataLink = response.substring(opening + 1, closing); - StringTokenizer tokenizer = new StringTokenizer(dataLink, ","); - try { - passiveHost = tokenizer.nextToken() + "." + tokenizer.nextToken() + "." - + tokenizer.nextToken() + "." + tokenizer.nextToken(); - passivePort = Integer.parseInt(tokenizer.nextToken()) * 256 - + Integer.parseInt(tokenizer.nextToken()); - } catch (NumberFormatException e) { - throw new IOException("SimpleFTP received bad data link information: " + response); - } catch (NoSuchElementException e){ - throw new IOException("SimpleFTP received bad data link information: " + response); - } - } - - if (reply.isPositiveCompletion()) { - if (dataStream == null) { - Socket dataSocket = new Socket(passiveHost, passivePort); - dataSocket.setSoTimeout(READ_TIMEOUT); - dataStream = new SocketInputStream(dataSocket, dataSocket.getInputStream()); - } - } - return reply; - } - - public void setRestPosition(long position) { - this.restPosition = position; - } - - public FTPReply retr(String file) throws IOException { - - if (restPosition >= 0) { - FTPReply restReply = executeCommand("REST " + restPosition); - if (!restReply.isSuccess()) { - return restReply; - } - } - - return executeCommand("RETR " + file); - } - - public FTPReply getReply() throws IOException { - return new FTPReply(responseReader); - } - - /** - * Return the size of the remote file - * - * @param file - * @return - * @throws IOException - */ - public FTPReply size(String file) throws IOException { - - return executeCommand("SIZE " + file); - - } - - - public InputStream getDataStream() throws IOException { - return dataStream; - } - - public void closeDataStream() throws IOException { - // NOTE -- some ftp servers seem to need a pause before closing the data stream - // if (dataStream != null) { - // try { - // // - // Thread.sleep(3000); - // } catch (InterruptedException e) { - // - // } - if( dataStream != null) { - dataStream.close(); - dataStream = null; - } - } - - - /** - * Disconnects from the host to which we are currently connected. - */ - public void disconnect() { - try { - //quit(); - if (commandStream != null) { - commandStream.close(); - responseReader.close(); - commandSocket.close(); - - if (dataStream != null) { - dataStream.close(); - } - } - } catch (IOException e) { - throw new SAMException("Error disconnecting", e); - } - - commandStream = null; - responseReader = null; - commandSocket = null; - } - - class SocketInputStream extends FilterInputStream { - - Socket socket; - - SocketInputStream(Socket socket, InputStream inputStream) { - super(inputStream); - this.socket = socket; - } - - @Override - public void close() throws IOException { - super.close(); - socket.close(); - FTPClient.this.dataStream = null; - } - } - -} +/* + * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. + * + * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), + * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. + * + * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR + * WARRANTIES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER + * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE + * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES + * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, + * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER + * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT + * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. + */ + +package htsjdk.samtools.util.ftp; + +import htsjdk.samtools.SAMException; +import java.io.BufferedReader; +import java.io.FilterInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.net.Socket; +import java.util.NoSuchElementException; +import java.util.StringTokenizer; + +/** + * @author jrobinso + * @date Oct 30, 2010 + */ +public class FTPClient { + + private Socket commandSocket = null; + + public static final int READ_TIMEOUT = 5 * 60 * 1000; + + /** + * Stream to write commands. + * NOTE -- a PrintStream is used no purpose (as opposed to PrintWriter). PrintWriter will not work! + */ + private PrintStream commandStream = null; + + private BufferedReader responseReader = null; + private InputStream dataStream; + private String passiveHost; + private int passivePort; + long restPosition = -1; + String host; + + /** + * Connects to the given FTP host on the default port. + */ + public FTPReply connect(String host) throws IOException { + this.host = host; + commandSocket = new Socket(host, 21); + commandSocket.setSoTimeout(READ_TIMEOUT); + commandStream = new PrintStream(commandSocket.getOutputStream()); + responseReader = new BufferedReader(new InputStreamReader(commandSocket.getInputStream())); + + FTPReply reply = new FTPReply(responseReader); + + if (!reply.isPositiveCompletion()) { + disconnect(); + } + + return reply; + } + + /** + * Executes the given FTP command on our current connection, returning the + * three digit response code from the server. This method only works for + * commands that do not require an additional data port. + */ + public FTPReply executeCommand(String command) throws IOException { + commandStream.println(command); + return new FTPReply(responseReader); + } + + /** + * Wrapper for the commands user [username] and pass + * [password]. + */ + public FTPReply login(String username, String password) throws IOException { + FTPReply response = executeCommand("user " + username); + if (!response.isPositiveIntermediate()) return response; + response = executeCommand("pass " + password); + return response; + } + + public FTPReply quit() throws IOException { + return executeCommand("QUIT"); + } + + public FTPReply binary() throws IOException { + return executeCommand("TYPE I"); + } + + public FTPReply pasv() throws IOException { + + FTPReply reply = executeCommand("PASV"); + + if (reply.getCode() == 226 || reply.getCode() == 426) { + reply = getReply(); + } + + String response = reply.getReplyString(); + + int code = reply.getCode(); + + int opening = response.indexOf('('); + int closing = response.indexOf(')', opening + 1); + if (closing > 0) { + String dataLink = response.substring(opening + 1, closing); + StringTokenizer tokenizer = new StringTokenizer(dataLink, ","); + try { + passiveHost = tokenizer.nextToken() + "." + tokenizer.nextToken() + "." + tokenizer.nextToken() + "." + + tokenizer.nextToken(); + passivePort = Integer.parseInt(tokenizer.nextToken()) * 256 + Integer.parseInt(tokenizer.nextToken()); + } catch (NumberFormatException e) { + throw new IOException("SimpleFTP received bad data link information: " + response); + } catch (NoSuchElementException e) { + throw new IOException("SimpleFTP received bad data link information: " + response); + } + } + + if (reply.isPositiveCompletion()) { + if (dataStream == null) { + Socket dataSocket = new Socket(passiveHost, passivePort); + dataSocket.setSoTimeout(READ_TIMEOUT); + dataStream = new SocketInputStream(dataSocket, dataSocket.getInputStream()); + } + } + return reply; + } + + public void setRestPosition(long position) { + this.restPosition = position; + } + + public FTPReply retr(String file) throws IOException { + + if (restPosition >= 0) { + FTPReply restReply = executeCommand("REST " + restPosition); + if (!restReply.isSuccess()) { + return restReply; + } + } + + return executeCommand("RETR " + file); + } + + public FTPReply getReply() throws IOException { + return new FTPReply(responseReader); + } + + /** + * Return the size of the remote file + * + * @param file + * @return + * @throws IOException + */ + public FTPReply size(String file) throws IOException { + + return executeCommand("SIZE " + file); + } + + public InputStream getDataStream() throws IOException { + return dataStream; + } + + public void closeDataStream() throws IOException { + // NOTE -- some ftp servers seem to need a pause before closing the data stream + // if (dataStream != null) { + // try { + // // + // Thread.sleep(3000); + // } catch (InterruptedException e) { + // + // } + if (dataStream != null) { + dataStream.close(); + dataStream = null; + } + } + + /** + * Disconnects from the host to which we are currently connected. + */ + public void disconnect() { + try { + // quit(); + if (commandStream != null) { + commandStream.close(); + responseReader.close(); + commandSocket.close(); + + if (dataStream != null) { + dataStream.close(); + } + } + } catch (IOException e) { + throw new SAMException("Error disconnecting", e); + } + + commandStream = null; + responseReader = null; + commandSocket = null; + } + + class SocketInputStream extends FilterInputStream { + + Socket socket; + + SocketInputStream(Socket socket, InputStream inputStream) { + super(inputStream); + this.socket = socket; + } + + @Override + public void close() throws IOException { + super.close(); + socket.close(); + FTPClient.this.dataStream = null; + } + } +} diff --git a/src/main/java/htsjdk/samtools/util/ftp/FTPReply.java b/src/main/java/htsjdk/samtools/util/ftp/FTPReply.java index 4648d9d880..8756c53e27 100644 --- a/src/main/java/htsjdk/samtools/util/ftp/FTPReply.java +++ b/src/main/java/htsjdk/samtools/util/ftp/FTPReply.java @@ -1,113 +1,107 @@ -/* - * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. - * - * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), - * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. - * - * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR - * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, - * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR - * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER - * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE - * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES - * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, - * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER - * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT - * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. - */ - -package htsjdk.samtools.util.ftp; - -import java.io.BufferedReader; -import java.io.IOException; - -/** - * @author jrobinso - * @date Oct 30, 2010 - */ -public class FTPReply { - - String reply; - int code; - - public FTPReply(BufferedReader inputStream) throws IOException { - - String response = null; - do { - response = inputStream.readLine(); - } while (response != null && - !(Character.isDigit(response.charAt(0)) && - Character.isDigit(response.charAt(1)) && - Character.isDigit(response.charAt(2)) && - response.charAt(3) == ' ')); - if (response == null || response.length() < 3) { - code = -1; - } else { - code = Integer.parseInt(response.substring(0, 3)); - reply = response.substring(3).trim(); - - } - } - - /** - * Gets server reply code from the control port after an ftp command has - * been executed. It knows the last line of the response because it begins - * with a 3 digit number and a space, (a dash instead of a space would be a - * continuation). - */ - - public int getCode() throws IOException { - return code; - } - - - /** - * Gets server reply string from the control port after an ftp command has - * been executed. This consists only of the last line of the response, - * and only the part after the response code. - */ - public String getReplyString() - throws IOException { - - return reply; - } - - - public boolean isSuccess() { - return isPositiveCompletion() || isPositiveIntermediate(); - } - - /** - * Determine if a reply code is a positive completion response. All - * codes beginning with a 2 are positive completion responses. - * The FTP server will send a positive completion response on the final - * successful completion of a command. - *

    - * - * @return True if a reply code is a postive completion response, false - * if not. - * * - */ - public boolean isPositiveCompletion() { - return (code >= 200 && code < 300); - } - - - /** - * Determine if a reply code is a positive intermediate response. All - * codes beginning with a 3 are positive intermediate responses. - * The FTP server will send a positive intermediate response on the - * successful completion of one part of a multi-part sequence of - * commands. For example, after a successful USER command, a positive - * intermediate response will be sent to indicate that the server is - * ready for the PASS command. - *

    - * - * @return True if a reply code is a postive intermediate response, false - * if not. - * * - */ - public boolean isPositiveIntermediate() { - return (code >= 300 && code < 400); - } -} +/* + * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. + * + * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), + * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. + * + * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR + * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER + * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE + * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES + * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, + * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER + * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT + * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. + */ + +package htsjdk.samtools.util.ftp; + +import java.io.BufferedReader; +import java.io.IOException; + +/** + * @author jrobinso + * @date Oct 30, 2010 + */ +public class FTPReply { + + String reply; + int code; + + public FTPReply(BufferedReader inputStream) throws IOException { + + String response = null; + do { + response = inputStream.readLine(); + } while (response != null + && !(Character.isDigit(response.charAt(0)) + && Character.isDigit(response.charAt(1)) + && Character.isDigit(response.charAt(2)) + && response.charAt(3) == ' ')); + if (response == null || response.length() < 3) { + code = -1; + } else { + code = Integer.parseInt(response.substring(0, 3)); + reply = response.substring(3).trim(); + } + } + + /** + * Gets server reply code from the control port after an ftp command has + * been executed. It knows the last line of the response because it begins + * with a 3 digit number and a space, (a dash instead of a space would be a + * continuation). + */ + public int getCode() throws IOException { + return code; + } + + /** + * Gets server reply string from the control port after an ftp command has + * been executed. This consists only of the last line of the response, + * and only the part after the response code. + */ + public String getReplyString() throws IOException { + + return reply; + } + + public boolean isSuccess() { + return isPositiveCompletion() || isPositiveIntermediate(); + } + + /** + * Determine if a reply code is a positive completion response. All + * codes beginning with a 2 are positive completion responses. + * The FTP server will send a positive completion response on the final + * successful completion of a command. + *

    + * + * @return True if a reply code is a postive completion response, false + * if not. + * * + */ + public boolean isPositiveCompletion() { + return (code >= 200 && code < 300); + } + + /** + * Determine if a reply code is a positive intermediate response. All + * codes beginning with a 3 are positive intermediate responses. + * The FTP server will send a positive intermediate response on the + * successful completion of one part of a multi-part sequence of + * commands. For example, after a successful USER command, a positive + * intermediate response will be sent to indicate that the server is + * ready for the PASS command. + *

    + * + * @return True if a reply code is a postive intermediate response, false + * if not. + * * + */ + public boolean isPositiveIntermediate() { + return (code >= 300 && code < 400); + } +} diff --git a/src/main/java/htsjdk/samtools/util/ftp/FTPStream.java b/src/main/java/htsjdk/samtools/util/ftp/FTPStream.java index 11a531cad6..71266d758a 100644 --- a/src/main/java/htsjdk/samtools/util/ftp/FTPStream.java +++ b/src/main/java/htsjdk/samtools/util/ftp/FTPStream.java @@ -1,53 +1,52 @@ -/* - * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. - * - * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), - * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. - * - * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR - * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, - * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR - * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER - * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE - * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES - * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, - * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER - * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT - * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. - */ - -package htsjdk.samtools.util.ftp; - -import java.io.FilterInputStream; -import java.io.IOException; - -/** - * A "non-seekable" ftp stream. This one doesn't support random access. - * - * It is assumed that the ftp client has been connected, put in passive mode, - * set to binary, and otherwise prepped for reading before creating this stream. - * - * @author jrobinso - * @date Oct 31, 2010 - */ -public class FTPStream extends FilterInputStream { - - FTPClient ftp; - - public FTPStream(FTPClient ftp) throws IOException { - super(ftp.getDataStream()); - this.ftp = ftp; - } - - - @Override - public int read(byte[] bytes, int i, int i1) throws IOException { - return super.read(bytes, i, i1); //To change body of overridden methods use File | Settings | File Templates. - } - - @Override - public void close() throws IOException { - super.close(); - ftp.disconnect(); - } -} +/* + * Copyright (c) 2007-2011 by The Broad Institute of MIT and Harvard. All Rights Reserved. + * + * This software is licensed under the terms of the GNU Lesser General Public License (LGPL), + * Version 2.1 which is available at http://www.opensource.org/licenses/lgpl-2.1.php. + * + * THE SOFTWARE IS PROVIDED "AS IS." THE BROAD AND MIT MAKE NO REPRESENTATIONS OR + * WARRANTES OF ANY KIND CONCERNING THE SOFTWARE, EXPRESS OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR + * PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER + * OR NOT DISCOVERABLE. IN NO EVENT SHALL THE BROAD OR MIT, OR THEIR RESPECTIVE + * TRUSTEES, DIRECTORS, OFFICERS, EMPLOYEES, AND AFFILIATES BE LIABLE FOR ANY DAMAGES + * OF ANY KIND, INCLUDING, WITHOUT LIMITATION, INCIDENTAL OR CONSEQUENTIAL DAMAGES, + * ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER + * THE BROAD OR MIT SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT + * SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. + */ + +package htsjdk.samtools.util.ftp; + +import java.io.FilterInputStream; +import java.io.IOException; + +/** + * A "non-seekable" ftp stream. This one doesn't support random access. + * + * It is assumed that the ftp client has been connected, put in passive mode, + * set to binary, and otherwise prepped for reading before creating this stream. + * + * @author jrobinso + * @date Oct 31, 2010 + */ +public class FTPStream extends FilterInputStream { + + FTPClient ftp; + + public FTPStream(FTPClient ftp) throws IOException { + super(ftp.getDataStream()); + this.ftp = ftp; + } + + @Override + public int read(byte[] bytes, int i, int i1) throws IOException { + return super.read(bytes, i, i1); // To change body of overridden methods use File | Settings | File Templates. + } + + @Override + public void close() throws IOException { + super.close(); + ftp.disconnect(); + } +} diff --git a/src/main/java/htsjdk/samtools/util/ftp/FTPUtils.java b/src/main/java/htsjdk/samtools/util/ftp/FTPUtils.java index a9f8cded75..4f33d43d27 100644 --- a/src/main/java/htsjdk/samtools/util/ftp/FTPUtils.java +++ b/src/main/java/htsjdk/samtools/util/ftp/FTPUtils.java @@ -18,11 +18,9 @@ package htsjdk.samtools.util.ftp; - import htsjdk.samtools.SAMException; import htsjdk.samtools.seekablestream.UserPasswordInput; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; import java.io.InputStream; import java.net.URL; @@ -30,7 +28,6 @@ import java.util.HashMap; import java.util.Map; - /** * @author jrobinso * @date Aug 31, 2010 @@ -52,8 +49,7 @@ public static boolean resourceAvailable(URL url) { } catch (IOException e) { return false; - } - finally { + } finally { if (is != null) { try { is.close(); @@ -71,16 +67,14 @@ public static long getContentLength(URL url) throws IOException { String sizeString = ftp.executeCommand("size " + url.getPath()).getReplyString(); return Integer.parseInt(sizeString); } catch (Exception e) { - return -1 ; - } - finally { - if(ftp != null) { + return -1; + } finally { + if (ftp != null) { ftp.disconnect(); } } } - /** * Connect to an FTP server * @@ -90,7 +84,8 @@ public static long getContentLength(URL url) throws IOException { * @return * @throws IOException */ - public static synchronized FTPClient connect(String host, String userInfo, UserPasswordInput userPasswordInput) throws IOException { + public static synchronized FTPClient connect(String host, String userInfo, UserPasswordInput userPasswordInput) + throws IOException { FTPClient ftp = new FTPClient(); FTPReply reply = ftp.connect(host); @@ -114,31 +109,29 @@ public static synchronized FTPClient connect(String host, String userInfo, UserP reply = ftp.login(user, password); if (!reply.isSuccess()) { - if (userPasswordInput == null) { + if (userPasswordInput == null) { throw new RuntimeIOException("Login failure for host: " + host); - } - else { - userPasswordInput.setHost(host); - boolean success = false; - while (!success) { - if (userPasswordInput.showDialog()) { - user = userPasswordInput.getUser(); - password = userPasswordInput.getPassword(); - reply = ftp.login(user, password); - success = reply.isSuccess(); - } else { - // canceled - break; - } - - } - if (success) { - userInfo = user + ":" + password; - userCredentials.put(host, userInfo); - } else { - throw new RuntimeIOException("Login failure for host: " + host); - } - } + } else { + userPasswordInput.setHost(host); + boolean success = false; + while (!success) { + if (userPasswordInput.showDialog()) { + user = userPasswordInput.getUser(); + password = userPasswordInput.getPassword(); + reply = ftp.login(user, password); + success = reply.isSuccess(); + } else { + // canceled + break; + } + } + if (success) { + userInfo = user + ":" + password; + userCredentials.put(host, userInfo); + } else { + throw new RuntimeIOException("Login failure for host: " + host); + } + } } reply = ftp.binary(); @@ -147,8 +140,5 @@ public static synchronized FTPClient connect(String host, String userInfo, UserP } return ftp; - } - } - diff --git a/src/main/java/htsjdk/samtools/util/htsget/HtsgetErrorResponse.java b/src/main/java/htsjdk/samtools/util/htsget/HtsgetErrorResponse.java index 9cf10bc4ad..24dc5b4c21 100644 --- a/src/main/java/htsjdk/samtools/util/htsget/HtsgetErrorResponse.java +++ b/src/main/java/htsjdk/samtools/util/htsget/HtsgetErrorResponse.java @@ -1,6 +1,5 @@ package htsjdk.samtools.util.htsget; - import org.json.JSONObject; /** diff --git a/src/main/java/htsjdk/samtools/util/htsget/HtsgetMalformedResponseException.java b/src/main/java/htsjdk/samtools/util/htsget/HtsgetMalformedResponseException.java index de746fcc9c..811abe07ba 100644 --- a/src/main/java/htsjdk/samtools/util/htsget/HtsgetMalformedResponseException.java +++ b/src/main/java/htsjdk/samtools/util/htsget/HtsgetMalformedResponseException.java @@ -14,4 +14,4 @@ public HtsgetMalformedResponseException(final String s, final Throwable throwabl public HtsgetMalformedResponseException(final Throwable throwable) { super(throwable); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/samtools/util/htsget/HtsgetPOSTRequest.java b/src/main/java/htsjdk/samtools/util/htsget/HtsgetPOSTRequest.java index 7457491419..788f91d56f 100644 --- a/src/main/java/htsjdk/samtools/util/htsget/HtsgetPOSTRequest.java +++ b/src/main/java/htsjdk/samtools/util/htsget/HtsgetPOSTRequest.java @@ -3,15 +3,14 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.Locatable; import htsjdk.samtools.util.RuntimeIOException; -import org.json.JSONArray; -import org.json.JSONObject; - import java.io.IOException; import java.net.HttpURLConnection; import java.net.URI; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import org.json.JSONArray; +import org.json.JSONObject; /** * Builder for an htsget POST request that allows opening a connection @@ -30,7 +29,6 @@ public class HtsgetPOSTRequest extends HtsgetRequest { * @param endpoint the full URI including both server path and the ID of the htsget resource, * without the filtering parameters defined in the htsget spec such as start or referenceName */ - public HtsgetPOSTRequest(final URI endpoint) { super(endpoint); this.intervals = new ArrayList<>(); @@ -145,11 +143,10 @@ public JSONObject queryBody() { } if (!this.fields.isEmpty()) { postBody.put( - "fields", - new JSONArray(this.getFields().stream() - .map(HtsgetRequestField::toString) - .toArray()) - ); + "fields", + new JSONArray(this.getFields().stream() + .map(HtsgetRequestField::toString) + .toArray())); } if (!this.tags.isEmpty()) { postBody.put("tags", new JSONArray(this.getTags().toArray())); @@ -158,25 +155,27 @@ public JSONObject queryBody() { postBody.put("notags", new JSONArray(this.getNoTags().toArray())); } if (!this.intervals.isEmpty()) { - postBody.put("regions", new JSONArray( - this.intervals.stream() - .map(interval -> { - final JSONObject intervalJson = new JSONObject(); - if (interval != null && interval.getContig() != null) { - intervalJson.put("referenceName", interval.getContig()); - // Do not insert start and end for unmapped reads or if we are requesting the entire contig - if (!interval.getContig().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { - // getStart() - 1 is necessary as GA4GH standards use 0-based coordinates while Locatables are 1-based - intervalJson.put("start", interval.getStart() - 1); - if (interval.getEnd() != Integer.MAX_VALUE && interval.getEnd() != -1) { - intervalJson.put("end", interval.getEnd()); + postBody.put( + "regions", + new JSONArray(this.intervals.stream() + .map(interval -> { + final JSONObject intervalJson = new JSONObject(); + if (interval != null && interval.getContig() != null) { + intervalJson.put("referenceName", interval.getContig()); + // Do not insert start and end for unmapped reads or if we are requesting the entire + // contig + if (!interval.getContig().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) { + // getStart() - 1 is necessary as GA4GH standards use 0-based coordinates while + // Locatables are 1-based + intervalJson.put("start", interval.getStart() - 1); + if (interval.getEnd() != Integer.MAX_VALUE && interval.getEnd() != -1) { + intervalJson.put("end", interval.getEnd()); + } + } } - } - } - return intervalJson; - }) - .toArray() - )); + return intervalJson; + }) + .toArray())); } return postBody; } diff --git a/src/main/java/htsjdk/samtools/util/htsget/HtsgetRequest.java b/src/main/java/htsjdk/samtools/util/htsget/HtsgetRequest.java index 1a46195e8f..f27ac622a4 100644 --- a/src/main/java/htsjdk/samtools/util/htsget/HtsgetRequest.java +++ b/src/main/java/htsjdk/samtools/util/htsget/HtsgetRequest.java @@ -2,7 +2,6 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.util.*; - import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; @@ -13,7 +12,6 @@ import java.util.*; import java.util.stream.Collectors; - /** * Builder for an htsget GET request that allows opening a connection * using the request after validating that it is properly formed. @@ -21,7 +19,7 @@ * This class currently supports version 1.2.0 of the spec as defined in https://samtools.github.io/hts-specs/htsget.html */ public class HtsgetRequest { - private final static Log log = Log.getInstance(HtsgetRequest.class); + private static final Log log = Log.getInstance(HtsgetRequest.class); public static final Interval UNMAPPED_UNPLACED_INTERVAL = new Interval("*", 1, Integer.MAX_VALUE); protected static final String PROTOCOL_VERSION = "vnd.ga4gh.htsget.v1.2.0"; protected static final String ACCEPT_TYPE = "application/" + PROTOCOL_VERSION + "+json"; @@ -162,31 +160,35 @@ public HtsgetRequest withNotags(final Collection notags) { * Validates that the user query obeys htsget spec */ public void validateRequest() { - if (this.dataClass != null && this.dataClass == HtsgetClass.header && ( - this.interval != null || - !this.fields.isEmpty() || - !this.tags.isEmpty() || - !this.notags.isEmpty())) { - throw new IllegalArgumentException("Invalid request: no query parameters except `format` may be specified when class=header"); + if (this.dataClass != null + && this.dataClass == HtsgetClass.header + && (this.interval != null + || !this.fields.isEmpty() + || !this.tags.isEmpty() + || !this.notags.isEmpty())) { + throw new IllegalArgumentException( + "Invalid request: no query parameters except `format` may be specified when class=header"); } if (this.format != null) { final String path = this.endpoint.getPath(); - if ((path.endsWith(FileExtensions.BAM) || path.endsWith(FileExtensions.CRAM)) && ( - this.format != HtsgetFormat.BAM && this.format != HtsgetFormat.CRAM)) { - throw new IllegalArgumentException("Specified reads format: " + this.format + " is incompatible with id's file extension " + path); + if ((path.endsWith(FileExtensions.BAM) || path.endsWith(FileExtensions.CRAM)) + && (this.format != HtsgetFormat.BAM && this.format != HtsgetFormat.CRAM)) { + throw new IllegalArgumentException( + "Specified reads format: " + this.format + " is incompatible with id's file extension " + path); } - if (FileExtensions.VCF_LIST.stream().anyMatch(path::endsWith) && ( - this.format != HtsgetFormat.VCF && this.format != HtsgetFormat.BCF)) { - throw new IllegalArgumentException("Specified variant format: " + this.format + " is incompatible with id's file extension " + path); + if (FileExtensions.VCF_LIST.stream().anyMatch(path::endsWith) + && (this.format != HtsgetFormat.VCF && this.format != HtsgetFormat.BCF)) { + throw new IllegalArgumentException("Specified variant format: " + this.format + + " is incompatible with id's file extension " + path); } } - final String intersections = this.tags.stream() - .filter(getNoTags()::contains) - .collect(Collectors.joining(", ")); + final String intersections = + this.tags.stream().filter(getNoTags()::contains).collect(Collectors.joining(", ")); if (!intersections.isEmpty()) { - throw new IllegalArgumentException("Invalid request: tags and notags overlap in the following fields: " + intersections); + throw new IllegalArgumentException( + "Invalid request: tags and notags overlap in the following fields: " + intersections); } } @@ -216,8 +218,8 @@ public URI toURI() { } if (!this.fields.isEmpty()) { queryParams.put( - "fields", - this.fields.stream().map(HtsgetRequestField::toString).collect(Collectors.joining(","))); + "fields", + this.fields.stream().map(HtsgetRequestField::toString).collect(Collectors.joining(","))); } if (!this.tags.isEmpty()) { queryParams.put("tags", String.join(",", this.tags)); @@ -227,17 +229,21 @@ public URI toURI() { } try { final String queryString = queryParams.entrySet().stream() - .map(e -> e.getKey() + "=" + e.getValue()) - .collect(Collectors.joining("&")); + .map(e -> e.getKey() + "=" + e.getValue()) + .collect(Collectors.joining("&")); final String updatedQuery = this.endpoint.getQuery() == null - ? (queryString.isEmpty() ? null : queryString) - : this.endpoint.getQuery() + "&" + queryString; - - return new URI(this.endpoint.getScheme(), - this.endpoint.getUserInfo(), this.endpoint.getHost(), this.endpoint.getPort(), - this.endpoint.getPath(), updatedQuery, - this.endpoint.getFragment()); + ? (queryString.isEmpty() ? null : queryString) + : this.endpoint.getQuery() + "&" + queryString; + + return new URI( + this.endpoint.getScheme(), + this.endpoint.getUserInfo(), + this.endpoint.getHost(), + this.endpoint.getPort(), + this.endpoint.getPath(), + updatedQuery, + this.endpoint.getFragment()); } catch (final URISyntaxException e) { throw new IllegalArgumentException("Could not create URI for request", e); } @@ -268,11 +274,11 @@ public HtsgetResponse getResponse() { final InputStream is = conn.getInputStream(); final int statusCode = conn.getResponseCode(); final String respContentType = conn.getContentType(); - if (respContentType != null && - !respContentType.isEmpty() && - !respContentType.contains(HtsgetRequest.PROTOCOL_VERSION)) { - log.warn("Supported htsget protocol version: " + HtsgetRequest.PROTOCOL_VERSION + - "may not be compatible with received content type: " + respContentType); + if (respContentType != null + && !respContentType.isEmpty() + && !respContentType.contains(HtsgetRequest.PROTOCOL_VERSION)) { + log.warn("Supported htsget protocol version: " + HtsgetRequest.PROTOCOL_VERSION + + "may not be compatible with received content type: " + respContentType); } final BufferedReader reader = new BufferedReader(new InputStreamReader(is)); @@ -286,9 +292,9 @@ public HtsgetResponse getResponse() { if (400 <= statusCode && statusCode < 500) { final HtsgetErrorResponse err = HtsgetErrorResponse.parse(json); throw new IllegalArgumentException( - "Invalid request, received error code: " + statusCode + - ", error type: " + err.getError() + - ", message: " + err.getMessage()); + "Invalid request, received error code: " + statusCode + ", error type: " + + err.getError() + ", message: " + + err.getMessage()); } else if (statusCode == 200) { return HtsgetResponse.parse(json); } else { diff --git a/src/main/java/htsjdk/samtools/util/htsget/HtsgetResponse.java b/src/main/java/htsjdk/samtools/util/htsget/HtsgetResponse.java index e73de907b3..0303990893 100644 --- a/src/main/java/htsjdk/samtools/util/htsget/HtsgetResponse.java +++ b/src/main/java/htsjdk/samtools/util/htsget/HtsgetResponse.java @@ -1,9 +1,6 @@ package htsjdk.samtools.util.htsget; import htsjdk.samtools.util.RuntimeIOException; -import org.json.JSONArray; -import org.json.JSONObject; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -14,6 +11,8 @@ import java.util.*; import java.util.stream.Collectors; import java.util.stream.IntStream; +import org.json.JSONArray; +import org.json.JSONObject; /** * Class allowing deserialization from json htsget response, as defined in https://samtools.github.io/hts-specs/htsget.html @@ -78,7 +77,8 @@ public InputStream getData() { case "http": case "https": try { - final HttpURLConnection conn = (HttpURLConnection) this.uri.toURL().openConnection(); + final HttpURLConnection conn = + (HttpURLConnection) this.uri.toURL().openConnection(); conn.setRequestMethod("GET"); this.headers.forEach(conn::setRequestProperty); conn.connect(); @@ -92,9 +92,10 @@ public InputStream getData() { throw new HtsgetMalformedResponseException("data URI must be base64 encoded: " + dataUri); } return new ByteArrayInputStream( - Base64.getDecoder().decode(dataUri.replaceFirst("^data:.*;base64,", ""))); + Base64.getDecoder().decode(dataUri.replaceFirst("^data:.*;base64,", ""))); default: - throw new HtsgetMalformedResponseException("Unrecognized URI scheme in data block: " + this.uri.getScheme()); + throw new HtsgetMalformedResponseException( + "Unrecognized URI scheme in data block: " + this.uri.getScheme()); } } @@ -107,8 +108,10 @@ public InputStream getData() { public static Block parse(final JSONObject blockJson) { final String uriJson = blockJson.optString("url", null); if (uriJson == null) { - throw new HtsgetMalformedResponseException("No URI found in Htsget data block: " + - blockJson.toString().substring(0, Math.min(100, blockJson.toString().length()))); + throw new HtsgetMalformedResponseException("No URI found in Htsget data block: " + + blockJson + .toString() + .substring(0, Math.min(100, blockJson.toString().length()))); } final URI uri; try { @@ -118,19 +121,15 @@ public static Block parse(final JSONObject blockJson) { } final String dataClassJson = blockJson.optString("class", null); - final HtsgetClass dataClass = dataClassJson == null - ? null - : HtsgetClass.valueOf(dataClassJson.toLowerCase()); - + final HtsgetClass dataClass = + dataClassJson == null ? null : HtsgetClass.valueOf(dataClassJson.toLowerCase()); final JSONObject headersJson = blockJson.optJSONObject("headers"); final Map headers = headersJson == null - ? null - : headersJson.toMap().entrySet().stream() - .collect(Collectors.toMap( - Map.Entry::getKey, - e -> e.getValue().toString() - )); + ? null + : headersJson.toMap().entrySet().stream() + .collect(Collectors.toMap( + Map.Entry::getKey, e -> e.getValue().toString())); return new Block(uri, headers, dataClass); } @@ -182,15 +181,12 @@ public static HtsgetResponse parse(final String s) { } final List blocks = IntStream.range(0, blocksJson.length()) - .mapToObj(blocksJson::getJSONObject) - .map(Block::parse) - .collect(Collectors.toList()); + .mapToObj(blocksJson::getJSONObject) + .map(Block::parse) + .collect(Collectors.toList()); return new HtsgetResponse( - formatJson == null ? null : HtsgetFormat.valueOf(formatJson.toUpperCase()), - blocks, - md5Json - ); + formatJson == null ? null : HtsgetFormat.valueOf(formatJson.toUpperCase()), blocks, md5Json); } /** diff --git a/src/main/java/htsjdk/samtools/util/nio/DeleteOnExitPathHook.java b/src/main/java/htsjdk/samtools/util/nio/DeleteOnExitPathHook.java index 8197c35d69..cf77ba133e 100644 --- a/src/main/java/htsjdk/samtools/util/nio/DeleteOnExitPathHook.java +++ b/src/main/java/htsjdk/samtools/util/nio/DeleteOnExitPathHook.java @@ -1,8 +1,6 @@ package htsjdk.samtools.util.nio; import htsjdk.samtools.util.Log; - -import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -20,6 +18,7 @@ public class DeleteOnExitPathHook { private static final Log LOG = Log.getInstance(DeleteOnExitPathHook.class); private static LinkedHashSet paths = new LinkedHashSet<>(); + static { Runtime.getRuntime().addShutdownHook(new Thread(DeleteOnExitPathHook::runHooks)); } @@ -34,7 +33,7 @@ private DeleteOnExitPathHook() {} * @throws IllegalStateException if the shutdown hook is in progress. */ public static synchronized void add(Path path) { - if(paths == null) { + if (paths == null) { // DeleteOnExitHook is running. Too late to add a file throw new IllegalStateException("Shutdown in progress"); } diff --git a/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java b/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java index fb3ac9a18c..79e04be4e2 100644 --- a/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java +++ b/src/main/java/htsjdk/samtools/util/zip/DeflaterFactory.java @@ -23,17 +23,27 @@ */ package htsjdk.samtools.util.zip; +import htsjdk.samtools.Defaults; import htsjdk.samtools.util.BlockCompressedOutputStream; +import htsjdk.samtools.util.Log; import java.util.zip.Deflater; /** * Factory for {@link Deflater} objects used by {@link BlockCompressedOutputStream}. * This class may be extended to provide alternative deflaters (e.g., for improved performance). + * + *

    By default, if {@link Defaults#USE_LIBDEFLATE} is true, this factory will attempt to + * create a {@link LibdeflateDeflater} backed by the libdeflate native library. If the native + * library is not available, it falls back to the JDK {@link Deflater}.

    */ public class DeflaterFactory { + private static final Log log = Log.getInstance(DeflaterFactory.class); + + /** Cached result of whether libdeflate is available; null means not yet tested. */ + private static volatile Boolean libdeflateAvailable; public DeflaterFactory() { - //Note: made explicit constructor to make searching for references easier + // Note: made explicit constructor to make searching for references easier } /** @@ -43,6 +53,39 @@ public DeflaterFactory() { * @param gzipCompatible if true then use GZIP compatible compression */ public Deflater makeDeflater(final int compressionLevel, final boolean gzipCompatible) { + if (Defaults.USE_LIBDEFLATE && isLibdeflateAvailable()) { + return new LibdeflateDeflater(compressionLevel, gzipCompatible); + } return new Deflater(compressionLevel, gzipCompatible); } + + /** Returns true if the libdeflate native library can be loaded. */ + static boolean isLibdeflateAvailable() { + if (libdeflateAvailable == null) { + synchronized (DeflaterFactory.class) { + if (libdeflateAvailable == null) { + libdeflateAvailable = testLibdeflate(); + } + } + } + return libdeflateAvailable; + } + + private static boolean testLibdeflate() { + try { + final LibdeflateDeflater deflater = new LibdeflateDeflater(1, true); + try { + deflater.setInput(new byte[] {0}, 0, 1); + deflater.finish(); + deflater.deflate(new byte[16], 0, 16); + } finally { + deflater.end(); + } + log.info("libdeflate is available; using libdeflate for DEFLATE compression."); + return true; + } catch (final Throwable t) { + log.info(t, "libdeflate is not available; falling back to JDK deflater."); + return false; + } + } } diff --git a/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java b/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java index c03dc9a0f1..08c7a10e1a 100644 --- a/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java +++ b/src/main/java/htsjdk/samtools/util/zip/InflaterFactory.java @@ -23,27 +23,33 @@ */ package htsjdk.samtools.util.zip; +import htsjdk.samtools.Defaults; import htsjdk.samtools.util.BlockGunzipper; import java.util.zip.Inflater; /** * Factory for {@link Inflater} objects used by {@link BlockGunzipper}. * This class may be extended to provide alternative inflaters (e.g., for improved performance). - * The default implementation returns a JDK {@link Inflater} + * + *

    By default, if {@link Defaults#USE_LIBDEFLATE} is true and the native library is available, + * this factory will create a {@link LibdeflateInflater}. Otherwise it falls back to the + * JDK {@link Inflater}.

    */ public class InflaterFactory { public InflaterFactory() { - //Note: made explicit constructor to make searching for references easier + // Note: made explicit constructor to make searching for references easier } /** * Returns an inflater object that will be used when reading DEFLATE compressed files. * Subclasses may override to provide their own inflater implementation. - * The default implementation returns a JDK {@link Inflater} * @param gzipCompatible if true then use GZIP compatible compression */ public Inflater makeInflater(final boolean gzipCompatible) { + if (Defaults.USE_LIBDEFLATE && DeflaterFactory.isLibdeflateAvailable()) { + return new LibdeflateInflater(gzipCompatible); + } return new Inflater(gzipCompatible); } } diff --git a/src/main/java/htsjdk/samtools/util/zip/LibdeflateDeflater.java b/src/main/java/htsjdk/samtools/util/zip/LibdeflateDeflater.java new file mode 100644 index 0000000000..20340bdadb --- /dev/null +++ b/src/main/java/htsjdk/samtools/util/zip/LibdeflateDeflater.java @@ -0,0 +1,144 @@ +/* + * The MIT License + * + * Copyright (c) 2026 Tim Fennell + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util.zip; + +import com.fulcrumgenomics.jlibdeflate.LibdeflateCompressor; +import java.nio.ByteBuffer; +import java.util.zip.Deflater; + +/** + * A {@link Deflater} implementation backed by libdeflate via the jlibdeflate library. + * Provides significantly faster DEFLATE compression than the JDK's built-in zlib. + * + *

    This class supports the subset of the Deflater API used by BGZF block compression: + * {@link #reset()}, {@link #setInput(byte[], int, int)}, {@link #finish()}, + * {@link #deflate(byte[], int, int)}, and {@link #finished()}.

    + */ +class LibdeflateDeflater extends Deflater { + + private final LibdeflateCompressor compressor; + private final boolean nowrap; + + private byte[] inputBuf; + private int inputOff; + private int inputLen; + private boolean finishing; + private boolean done; + + /** + * Creates a new LibdeflateDeflater at the specified compression level. + * + * @param level compression level (0-12 for libdeflate, 0-9 for standard compatibility) + * @param nowrap if true, produce raw DEFLATE (no zlib/gzip header); if false, produce zlib format + */ + LibdeflateDeflater(final int level, final boolean nowrap) { + // The super constructor allocates a native zlib stream we won't use. + // We immediately free it since all compression goes through libdeflate. + super(level, nowrap); + super.end(); + + this.nowrap = nowrap; + this.compressor = new LibdeflateCompressor(level); + } + + @Override + public void setInput(final byte[] input, final int off, final int len) { + this.inputBuf = input; + this.inputOff = off; + this.inputLen = len; + this.done = false; + } + + @Override + public void setInput(final ByteBuffer input) { + final int len = input.remaining(); + if (input.hasArray()) { + setInput(input.array(), input.arrayOffset() + input.position(), len); + input.position(input.limit()); + } else { + final byte[] bytes = new byte[len]; + input.get(bytes); + setInput(bytes, 0, len); + } + } + + @Override + public void finish() { + this.finishing = true; + } + + @Override + public int deflate(final byte[] output, final int off, final int len) { + if (inputBuf == null || inputLen == 0) { + done = true; + return 0; + } + + final int compressed = nowrap + ? compressor.deflateCompress(inputBuf, inputOff, inputLen, output, off, len) + : compressor.zlibCompress(inputBuf, inputOff, inputLen, output, off, len); + if (compressed == -1) { + // Output buffer too small — caller will handle this (e.g. fall back to no-compression) + done = false; + return 0; + } + + done = true; + return compressed; + } + + @Override + public int deflate(final ByteBuffer output) { + return deflate(output, Deflater.NO_FLUSH); + } + + @Override + public int deflate(final ByteBuffer output, final int flush) { + if (!output.hasArray()) { + throw new UnsupportedOperationException("LibdeflateDeflater requires a heap-backed ByteBuffer for output"); + } + final int n = deflate(output.array(), output.arrayOffset() + output.position(), output.remaining()); + output.position(output.position() + n); + return n; + } + + @Override + public boolean finished() { + return finishing && done; + } + + @Override + public void reset() { + inputBuf = null; + inputOff = 0; + inputLen = 0; + finishing = false; + done = false; + } + + @Override + public void end() { + compressor.close(); + } +} diff --git a/src/main/java/htsjdk/samtools/util/zip/LibdeflateInflater.java b/src/main/java/htsjdk/samtools/util/zip/LibdeflateInflater.java new file mode 100644 index 0000000000..a806cc1558 --- /dev/null +++ b/src/main/java/htsjdk/samtools/util/zip/LibdeflateInflater.java @@ -0,0 +1,121 @@ +/* + * The MIT License + * + * Copyright (c) 2026 Tim Fennell + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +package htsjdk.samtools.util.zip; + +import com.fulcrumgenomics.jlibdeflate.LibdeflateDecompressor; +import java.nio.ByteBuffer; +import java.util.zip.DataFormatException; +import java.util.zip.Inflater; + +/** + * An {@link Inflater} implementation backed by libdeflate via the jlibdeflate library. + * Provides significantly faster DEFLATE decompression than the JDK's built-in zlib. + * + *

    This class supports the subset of the Inflater API used by BGZF block decompression: + * {@link #reset()}, {@link #setInput(byte[], int, int)}, and + * {@link #inflate(byte[], int, int)}.

    + * + *

    The libdeflate decompressor requires the exact uncompressed size to be known. In BGZF, + * this is always the case since the uncompressed size is stored in the block footer and + * passed as the {@code len} parameter to {@link #inflate(byte[], int, int)}.

    + */ +class LibdeflateInflater extends Inflater { + + private final LibdeflateDecompressor decompressor; + private final boolean nowrap; + + private byte[] inputBuf; + private int inputOff; + private int inputLen; + + LibdeflateInflater(final boolean nowrap) { + // The super constructor allocates a native zlib stream we won't use. + // We immediately free it since all decompression goes through libdeflate. + super(nowrap); + super.end(); + + this.nowrap = nowrap; + this.decompressor = new LibdeflateDecompressor(); + } + + @Override + public void setInput(final byte[] input, final int off, final int len) { + this.inputBuf = input; + this.inputOff = off; + this.inputLen = len; + } + + @Override + public void setInput(final ByteBuffer input) { + final int len = input.remaining(); + if (input.hasArray()) { + setInput(input.array(), input.arrayOffset() + input.position(), len); + input.position(input.limit()); + } else { + final byte[] bytes = new byte[len]; + input.get(bytes); + setInput(bytes, 0, len); + } + } + + @Override + public int inflate(final ByteBuffer output) throws DataFormatException { + if (!output.hasArray()) { + throw new UnsupportedOperationException("LibdeflateInflater requires a heap-backed ByteBuffer for output"); + } + final int n = inflate(output.array(), output.arrayOffset() + output.position(), output.remaining()); + output.position(output.position() + n); + return n; + } + + @Override + public int inflate(final byte[] output, final int off, final int len) throws DataFormatException { + if (inputBuf == null || inputLen == 0 || len == 0) { + return 0; + } + + try { + if (nowrap) { + decompressor.deflateDecompress(inputBuf, inputOff, inputLen, output, off, len); + } else { + decompressor.zlibDecompress(inputBuf, inputOff, inputLen, output, off, len); + } + return len; + } catch (final Exception e) { + throw new DataFormatException(e.getMessage()); + } + } + + @Override + public void reset() { + inputBuf = null; + inputOff = 0; + inputLen = 0; + } + + @Override + public void end() { + decompressor.close(); + } +} diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java index a1e2771f7d..2447eb7e32 100644 --- a/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureCodec.java @@ -31,13 +31,14 @@ * Note that that method is the only way that the right codec for a file is identified and that only one codec * is allowed to identify itself as being able to decode any given file. */ -public abstract class AbstractFeatureCodec implements FeatureCodec { +public abstract class AbstractFeatureCodec + implements FeatureCodec { private final Class myClass; protected AbstractFeatureCodec(final Class myClass) { this.myClass = myClass; } - + @Override public Feature decodeLoc(final SOURCE source) throws IOException { return decode(source); diff --git a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java index 3a57ca44e7..24c6f1af04 100755 --- a/src/main/java/htsjdk/tribble/AbstractFeatureReader.java +++ b/src/main/java/htsjdk/tribble/AbstractFeatureReader.java @@ -22,15 +22,10 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.index.Index; import htsjdk.tribble.util.ParsingUtils; -import htsjdk.tribble.util.TabixUtils; - import java.io.File; import java.io.IOException; import java.net.URI; import java.nio.channels.SeekableByteChannel; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.function.Function; @@ -42,15 +37,15 @@ */ public abstract class AbstractFeatureReader implements FeatureReader { // the logging destination for this source - //private final static Logger log = Logger.getLogger("BasicFeatureSource"); + // private final static Logger log = Logger.getLogger("BasicFeatureSource"); /** * The path to underlying data file, this must be the input path converted with {@link FeatureCodec#getPathToDataFile(String)} */ String path; - - // a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be injected + // a wrapper to apply to the raw stream of the Feature file to allow features like prefetching and caching to be + // injected final Function wrapper; // a wrapper to apply to the raw stream of the index file final Function indexWrapper; @@ -69,7 +64,8 @@ public abstract class AbstractFeatureReader implement /** * Calls {@link #getFeatureReader(String, FeatureCodec, boolean)} with {@code requireIndex} = true */ - public static AbstractFeatureReader getFeatureReader(final String featureFile, final FeatureCodec codec) throws TribbleException { + public static AbstractFeatureReader getFeatureReader( + final String featureFile, final FeatureCodec codec) throws TribbleException { return getFeatureReader(featureFile, codec, true); } @@ -77,16 +73,22 @@ public static AbstractFeatureReader AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { + public static AbstractFeatureReader getFeatureReader( + final String featureResource, final FeatureCodec codec, final boolean requireIndex) + throws TribbleException { return getFeatureReader(featureResource, null, codec, requireIndex, null, null); } - /** * {@link #getFeatureReader(String, String, FeatureCodec, boolean, Function, Function)} with {@code null} for wrapper, and indexWrapper * @throws TribbleException */ - public static AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex) throws TribbleException { + public static AbstractFeatureReader getFeatureReader( + final String featureResource, + String indexResource, + final FeatureCodec codec, + final boolean requireIndex) + throws TribbleException { return getFeatureReader(featureResource, indexResource, codec, requireIndex, null, null); } @@ -104,24 +106,37 @@ public static AbstractFeatureReader AbstractFeatureReader getFeatureReader(final String featureResource, String indexResource, final FeatureCodec codec, final boolean requireIndex, Function wrapper, Function indexWrapper) throws TribbleException { + public static AbstractFeatureReader getFeatureReader( + final String featureResource, + String indexResource, + final FeatureCodec codec, + final boolean requireIndex, + Function wrapper, + Function indexWrapper) + throws TribbleException { try { // Test for tabix index. - // Note that we use pathToDataFile here when determining the file type, but featureResource when constructing the readers. + // Note that we use pathToDataFile here when determining the file type, but featureResource when + // constructing the readers. // This is because the reader's constructor will convert the path and it needs to be converted exactly once. final String pathToDataFile = codec.getPathToDataFile(featureResource); if (methods.isTabix(pathToDataFile, indexResource)) { - if ( ! (codec instanceof AsciiFeatureCodec) ) - throw new TribbleException("Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + codec.getClass().getSimpleName()); - return new TabixFeatureReader<>(featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper); + if (!(codec instanceof AsciiFeatureCodec)) + throw new TribbleException( + "Tabix indexed files only work with ASCII codecs, but received non-Ascii codec " + + codec.getClass().getSimpleName()); + return new TabixFeatureReader<>( + featureResource, indexResource, (AsciiFeatureCodec) codec, wrapper, indexWrapper); } // Not tabix => tribble index file (might be gzipped, but not block gzipped) else { - return new TribbleIndexedFeatureReader<>(featureResource, indexResource, codec, requireIndex, wrapper, indexWrapper); + return new TribbleIndexedFeatureReader<>( + featureResource, indexResource, codec, requireIndex, wrapper, indexWrapper); } } catch (final IOException e) { - throw new TribbleException.MalformedFeatureFile("Unable to create BasicFeatureReader using feature file ", featureResource, e); + throw new TribbleException.MalformedFeatureFile( + "Unable to create BasicFeatureReader using feature file ", featureResource, e); } catch (final TribbleException e) { e.setSource(featureResource); throw e; @@ -137,22 +152,26 @@ public static AbstractFeatureReader AbstractFeatureReader getFeatureReader(final String featureResource, final FeatureCodec codec, final Index index) throws TribbleException { + public static AbstractFeatureReader getFeatureReader( + final String featureResource, final FeatureCodec codec, final Index index) + throws TribbleException { try { return new TribbleIndexedFeatureReader<>(featureResource, codec, index); } catch (final IOException e) { - throw new TribbleException.MalformedFeatureFile("Unable to create AbstractFeatureReader using feature file ", featureResource, e); + throw new TribbleException.MalformedFeatureFile( + "Unable to create AbstractFeatureReader using feature file ", featureResource, e); } - } protected AbstractFeatureReader(final String path, final FeatureCodec codec) { this(path, codec, null, null); } - protected AbstractFeatureReader(final String path, final FeatureCodec codec, - final Function wrapper, - final Function indexWrapper) { + protected AbstractFeatureReader( + final String path, + final FeatureCodec codec, + final Function wrapper, + final Function indexWrapper) { this.path = codec.getPathToDataFile(path); this.codec = codec; this.wrapper = wrapper; @@ -172,11 +191,11 @@ public boolean hasIndex() { * @return true if the reader has an index, which means that it can be queried. */ @Override - public boolean isQueryable(){ + public boolean isQueryable() { return hasIndex(); } - public static void setComponentMethods(ComponentMethods methods){ + public static void setComponentMethods(ComponentMethods methods) { AbstractFeatureReader.methods = methods; } @@ -184,7 +203,7 @@ public static void setComponentMethods(ComponentMethods methods){ * @deprecated use {@link IOUtil#hasBlockCompressedExtension(String)}. */ @Deprecated - public static boolean hasBlockCompressedExtension (final String fileName) { + public static boolean hasBlockCompressedExtension(final String fileName) { return IOUtil.hasBlockCompressedExtension(fileName); } @@ -192,7 +211,7 @@ public static boolean hasBlockCompressedExtension (final String fileName) { * @deprecated use {@link IOUtil#hasBlockCompressedExtension(File)}. */ @Deprecated - public static boolean hasBlockCompressedExtension (final File file) { + public static boolean hasBlockCompressedExtension(final File file) { return IOUtil.hasBlockCompressedExtension(file.getName()); } @@ -200,7 +219,7 @@ public static boolean hasBlockCompressedExtension (final File file) { * @deprecated use {@link IOUtil#hasBlockCompressedExtension(URI)}. */ @Deprecated - public static boolean hasBlockCompressedExtension (final URI uri) { + public static boolean hasBlockCompressedExtension(final URI uri) { String path = uri.getPath(); return IOUtil.hasBlockCompressedExtension(path); } @@ -216,23 +235,38 @@ public Object getHeader() { } static class EmptyIterator implements CloseableTribbleIterator { - @Override public Iterator iterator() { return this; } - @Override public boolean hasNext() { return false; } - @Override public T next() { return null; } - @Override public void remove() { } - @Override public void close() { } + @Override + public Iterator iterator() { + return this; + } + + @Override + public boolean hasNext() { + return false; + } + + @Override + public T next() { + return null; + } + + @Override + public void remove() {} + + @Override + public void close() {} } public static boolean isTabix(String resourcePath, String indexPath) throws IOException { - if(indexPath == null){ + if (indexPath == null) { indexPath = ParsingUtils.appendToPath(resourcePath, FileExtensions.TABIX_INDEX); } return IOUtil.hasBlockCompressedExtension(resourcePath) && ParsingUtils.resourceExists(indexPath); } - public static class ComponentMethods{ + public static class ComponentMethods { - public boolean isTabix(String resourcePath, String indexPath) throws IOException{ + public boolean isTabix(String resourcePath, String indexPath) throws IOException { return AbstractFeatureReader.isTabix(resourcePath, indexPath); } } diff --git a/src/main/java/htsjdk/tribble/AsciiFeatureCodec.java b/src/main/java/htsjdk/tribble/AsciiFeatureCodec.java index 02c4635112..1fc4a9cd99 100644 --- a/src/main/java/htsjdk/tribble/AsciiFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/AsciiFeatureCodec.java @@ -22,7 +22,6 @@ import htsjdk.samtools.util.LocationAware; import htsjdk.samtools.util.Log; import htsjdk.tribble.readers.*; - import java.io.IOException; import java.io.InputStream; @@ -36,10 +35,11 @@ */ public abstract class AsciiFeatureCodec extends AbstractFeatureCodec { private static final Log log = Log.getInstance(AsciiFeatureCodec.class); + protected AsciiFeatureCodec(final Class myClass) { super(myClass); } - + @Override public void close(final LineIterator lineIterator) { CloserUtil.close(lineIterator); @@ -60,8 +60,8 @@ public LineIterator makeSourceFromStream(final InputStream bufferedInputStream) return new LineIteratorImpl(new SynchronousLineReader(bufferedInputStream)); } - /** - * Convenience method. Decoding in ASCII files operates line-by-line, so obviate the need to call + /** + * Convenience method. Decoding in ASCII files operates line-by-line, so obviate the need to call * {@link htsjdk.tribble.readers.LineIterator#next()} in implementing classes and, instead, have them implement * {@link AsciiFeatureCodec#decode(String)}. */ @@ -84,5 +84,5 @@ public FeatureCodecHeader readHeader(final LineIterator lineIterator) throws IOE * * @return the actual header data in the file, or null if none is available */ - abstract public Object readActualHeader(final LineIterator reader); + public abstract Object readActualHeader(final LineIterator reader); } diff --git a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java index dbd0afc47c..018ed4be0e 100644 --- a/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java +++ b/src/main/java/htsjdk/tribble/BinaryFeatureCodec.java @@ -5,7 +5,6 @@ import htsjdk.samtools.util.RuntimeIOException; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.PositionalBufferedStream; - import java.io.IOException; import java.io.InputStream; @@ -13,13 +12,12 @@ * Implements common methods of {@link FeatureCodec}s that read from {@link htsjdk.tribble.readers.PositionalBufferedStream}s. * @author mccowan */ -abstract public class BinaryFeatureCodec implements FeatureCodec { +public abstract class BinaryFeatureCodec implements FeatureCodec { @Override public PositionalBufferedStream makeSourceFromStream(final InputStream bufferedInputStream) { if (bufferedInputStream instanceof PositionalBufferedStream) return (PositionalBufferedStream) bufferedInputStream; - else - return new PositionalBufferedStream(bufferedInputStream); + else return new PositionalBufferedStream(bufferedInputStream); } /** {@link PositionalBufferedStream} is already {@link LocationAware}. */ diff --git a/src/main/java/htsjdk/tribble/Feature.java b/src/main/java/htsjdk/tribble/Feature.java index 9ed852b14c..e6706d2991 100644 --- a/src/main/java/htsjdk/tribble/Feature.java +++ b/src/main/java/htsjdk/tribble/Feature.java @@ -23,7 +23,6 @@ */ package htsjdk.tribble; - import htsjdk.samtools.util.Locatable; /** @@ -37,8 +36,7 @@ public interface Feature extends Locatable { * @deprecated on 03/2015. Use getContig() instead. */ @Deprecated - default public String getChr() { + public default String getChr() { return getContig(); } - } diff --git a/src/main/java/htsjdk/tribble/FeatureCodec.java b/src/main/java/htsjdk/tribble/FeatureCodec.java index 68199b282f..3dc28fd57e 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodec.java +++ b/src/main/java/htsjdk/tribble/FeatureCodec.java @@ -18,10 +18,8 @@ package htsjdk.tribble; -import htsjdk.io.IOPath; import htsjdk.samtools.util.LocationAware; import htsjdk.tribble.index.tabix.TabixFormat; - import java.io.IOException; import java.io.InputStream; @@ -75,8 +73,8 @@ public interface FeatureCodec { /** * Read and return the header, or null if there is no header. - * - * Note: Implementers of this method must be careful to read exactly as much from {@link SOURCE} as needed to parse the header, and no + * + * Note: Implementers of this method must be careful to read exactly as much from {@link SOURCE} as needed to parse the header, and no * more. Otherwise, data that might otherwise be fed into parsing a {@link Feature} may be lost. * * @param source the source from which to decode the header @@ -92,7 +90,7 @@ public interface FeatureCodec { *

    * This function is used by reflections based tools, so we can know the underlying type *

    - * + * * @return the feature type this codec generates. */ public Class getFeatureType(); @@ -108,7 +106,7 @@ public interface FeatureCodec { * and is thus suitable for use during indexing. Like {@link #makeSourceFromStream(java.io.InputStream)}, except * the {@link LocationAware} compatibility is required for creating indexes. *

    - * Implementers of this method must return a type that is both {@link LocationAware} as well as {@link SOURCE}. Note that this + * Implementers of this method must return a type that is both {@link LocationAware} as well as {@link SOURCE}. Note that this * requirement cannot be enforced via the method signature due to limitations in Java's generic typing system. Instead, consumers * should cast the call result into a {@link SOURCE} when applicable. *

    @@ -154,7 +152,7 @@ public interface FeatureCodec { * @return the format to use with tabix * @throws TribbleException if the format is not defined */ - default public TabixFormat getTabixFormat() { + public default TabixFormat getTabixFormat() { throw new TribbleException(this.getClass().getSimpleName() + "does not have defined tabix format"); } @@ -171,7 +169,7 @@ default public TabixFormat getTabixFormat() { * @return the path to the data file that should be parsed by this codec to produce Features. * @throws TribbleException codecs may throw if they cannot decode the path. */ - default String getPathToDataFile(String path){ + default String getPathToDataFile(String path) { return path; } } diff --git a/src/main/java/htsjdk/tribble/FeatureCodecHeader.java b/src/main/java/htsjdk/tribble/FeatureCodecHeader.java index 159d1ed92f..751f051f8e 100644 --- a/src/main/java/htsjdk/tribble/FeatureCodecHeader.java +++ b/src/main/java/htsjdk/tribble/FeatureCodecHeader.java @@ -36,10 +36,10 @@ */ public class FeatureCodecHeader { /** The value of the headerEnd field when there's no header */ - public final static long NO_HEADER_END = 0; + public static final long NO_HEADER_END = 0; /** A public instance representing no header */ - public final static FeatureCodecHeader EMPTY_HEADER = new FeatureCodecHeader(null, NO_HEADER_END); + public static final FeatureCodecHeader EMPTY_HEADER = new FeatureCodecHeader(null, NO_HEADER_END); private final Object headerValue; private final long headerEnd; @@ -55,7 +55,7 @@ public class FeatureCodecHeader { * there's no header at all */ public FeatureCodecHeader(final Object headerValue, final long headerEnd) { - if ( headerEnd < 0 ) throw new TribbleException("Header end < 0"); + if (headerEnd < 0) throw new TribbleException("Header end < 0"); this.headerValue = headerValue; this.headerEnd = headerEnd; } diff --git a/src/main/java/htsjdk/tribble/FeatureReader.java b/src/main/java/htsjdk/tribble/FeatureReader.java index d0e65c7bc8..15bc74e105 100644 --- a/src/main/java/htsjdk/tribble/FeatureReader.java +++ b/src/main/java/htsjdk/tribble/FeatureReader.java @@ -19,7 +19,6 @@ package htsjdk.tribble; import htsjdk.samtools.util.Locatable; - import java.io.Closeable; import java.io.IOException; import java.util.List; diff --git a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java index 9cdd92a874..587ede794c 100644 --- a/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java +++ b/src/main/java/htsjdk/tribble/IntervalList/IntervalListCodec.java @@ -40,7 +40,6 @@ * * Also contains the parsing code for the non-tribble parsing of IntervalLists */ - public class IntervalListCodec extends AsciiFeatureCodec { private final Log log = Log.getInstance(IntervalListCodec.class); @@ -70,8 +69,7 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction // Make sure we have the right number of fields final String[] fields = line.split("\t"); if (fields.length != 5) { - throw new TribbleException("Invalid interval record contains " + - fields.length + " fields: " + line); + throw new TribbleException("Invalid interval record contains " + fields.length + " fields: " + line); } // Then parse them out @@ -84,22 +82,21 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction final int start = format.parseInt(fields[START_POS]); final int end = format.parseInt(fields[END_POS]); if (start < 1) { - throw new IllegalArgumentException("Coordinate less than 1: start value of " + start + - " is less than 1 and thus illegal"); + throw new IllegalArgumentException( + "Coordinate less than 1: start value of " + start + " is less than 1 and thus illegal"); } if (start > end + 1) { - throw new IllegalArgumentException("Start value of " + start + - " is greater than end + 1 for end of value: " + end + - ". I'm afraid I cannot let you do that."); + throw new IllegalArgumentException("Start value of " + start + " is greater than end + 1 for end of value: " + + end + ". I'm afraid I cannot let you do that."); } Strand strand = Strand.decode(fields[STRAND_POS]); - if (strand==Strand.NONE) throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); + if (strand == Strand.NONE) throw new IllegalArgumentException("Invalid strand field: " + fields[STRAND_POS]); final String name = fields[NAME_POS]; - final Interval interval = new Interval(seq, start, end, strand==Strand.NEGATIVE, name); + final Interval interval = new Interval(seq, start, end, strand == Strand.NEGATIVE, name); final SAMSequenceRecord sequence = dict.getSequence(seq); if (sequence == null) { log.warn("Ignoring interval for unknown reference: " + interval); @@ -107,7 +104,8 @@ private Interval parseIntervalString(final String line, final SAMSequenceDiction } else { final int sequenceLength = sequence.getSequenceLength(); if (sequenceLength > 0 && sequenceLength < end) { - throw new IllegalArgumentException("interval with end: " + end + " extends beyond end of sequence with length: " + sequenceLength); + throw new IllegalArgumentException( + "interval with end: " + end + " extends beyond end of sequence with length: " + sequenceLength); } return interval; } @@ -130,37 +128,39 @@ public Interval decode(final String line) { return parseIntervalString(line, dictionary); } - @Override public Object readActualHeader(LineIterator lineIterator) { final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec(); - final SAMFileHeader header = headerCodec.decode(new LineReader() { - int lineNo = 0; - @Override - public String readLine() { - lineNo++; - return lineIterator.next(); - } - @Override - public int getLineNumber() { - return lineNo; - } - @Override - public int peek() { - return lineIterator.hasNext() ? - lineIterator.peek().charAt(0) : - LineReader.EOF_VALUE; - } - @Override - public void close() { } - }, "IntervalListCodec"); + final SAMFileHeader header = headerCodec.decode( + new LineReader() { + int lineNo = 0; + + @Override + public String readLine() { + lineNo++; + return lineIterator.next(); + } + + @Override + public int getLineNumber() { + return lineNo; + } + + @Override + public int peek() { + return lineIterator.hasNext() ? lineIterator.peek().charAt(0) : LineReader.EOF_VALUE; + } + + @Override + public void close() {} + }, + "IntervalListCodec"); dictionary = header.getSequenceDictionary(); return header; } @Override public boolean canDecode(final String s) { - return s.endsWith(FileExtensions.INTERVAL_LIST) || - s.endsWith(FileExtensions.COMPRESSED_INTERVAL_LIST); + return s.endsWith(FileExtensions.INTERVAL_LIST) || s.endsWith(FileExtensions.COMPRESSED_INTERVAL_LIST); } } diff --git a/src/main/java/htsjdk/tribble/MutableFeature.java b/src/main/java/htsjdk/tribble/MutableFeature.java index 042fd25fb8..e96aed2810 100644 --- a/src/main/java/htsjdk/tribble/MutableFeature.java +++ b/src/main/java/htsjdk/tribble/MutableFeature.java @@ -34,7 +34,7 @@ public class MutableFeature implements Feature { public int end; public MutableFeature(final Feature feature) { - this(feature.getContig(),feature.getStart(),feature.getEnd()); + this(feature.getContig(), feature.getStart(), feature.getEnd()); } public MutableFeature(final String contig, final int start, final int end) { diff --git a/src/main/java/htsjdk/tribble/NameAwareCodec.java b/src/main/java/htsjdk/tribble/NameAwareCodec.java index ce90f5c12a..995c0b6674 100755 --- a/src/main/java/htsjdk/tribble/NameAwareCodec.java +++ b/src/main/java/htsjdk/tribble/NameAwareCodec.java @@ -33,5 +33,6 @@ */ public interface NameAwareCodec { public String getName(); + public void setName(String name); } diff --git a/src/main/java/htsjdk/tribble/NamedFeature.java b/src/main/java/htsjdk/tribble/NamedFeature.java index 5f0dcb4a6e..65a165b0b8 100644 --- a/src/main/java/htsjdk/tribble/NamedFeature.java +++ b/src/main/java/htsjdk/tribble/NamedFeature.java @@ -23,7 +23,6 @@ */ package htsjdk.tribble; - /** * An interface for features provided via an interval file, e.g. bed or interval_list. * Provides a common interface for accessing the name column for both of these file types. diff --git a/src/main/java/htsjdk/tribble/TabixFeatureReader.java b/src/main/java/htsjdk/tribble/TabixFeatureReader.java index cd846e4f8b..54c2560267 100644 --- a/src/main/java/htsjdk/tribble/TabixFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TabixFeatureReader.java @@ -27,7 +27,6 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.tribble.readers.*; - import java.io.IOException; import java.io.InputStream; import java.nio.channels.SeekableByteChannel; @@ -60,7 +59,8 @@ public TabixFeatureReader(final String featureFile, final AsciiFeatureCodec code * @param codec * @throws IOException */ - public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec) throws IOException { + public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec) + throws IOException { this(featureFile, indexFile, codec, null, null); } @@ -73,9 +73,13 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina * @param indexWrapper a wrapper to apply to the byte stream from the indexResource, may be null, will only be * applied if indexFile is a uri representing a {@link java.nio.file.Path} */ - public TabixFeatureReader(final String featureFile, final String indexFile, final AsciiFeatureCodec codec, - final Function wrapper, - final Function indexWrapper) throws IOException { + public TabixFeatureReader( + final String featureFile, + final String indexFile, + final AsciiFeatureCodec codec, + final Function wrapper, + final Function indexWrapper) + throws IOException { super(featureFile, codec, wrapper, indexWrapper); tabixReader = new TabixReader(this.path, indexFile, wrapper, indexWrapper); sequenceNames = new ArrayList<>(tabixReader.getChromosomes()); @@ -91,10 +95,12 @@ public TabixFeatureReader(final String featureFile, final String indexFile, fina private void readHeader() throws IOException { SOURCE source = null; try { - source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream(SeekableStreamFactory.getInstance().getStreamFor(path, wrapper)))); + source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockCompressedInputStream( + SeekableStreamFactory.getInstance().getStreamFor(path, wrapper)))); header = codec.readHeader(source); } catch (Exception e) { - throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(), path, e); + throw new TribbleException.MalformedFeatureFile( + "Unable to parse header with error: " + e.getMessage(), path, e); } finally { if (source != null) { codec.close(source); @@ -124,18 +130,21 @@ public List getSequenceNames() { @Override public CloseableTribbleIterator query(final String chr, final int start, final int end) throws IOException { final List mp = getSequenceNames(); - if (mp == null) throw new TribbleException.TabixReaderFailure("Unable to find sequence named " + chr + - " in the tabix index. ", path); + if (mp == null) + throw new TribbleException.TabixReaderFailure( + "Unable to find sequence named " + chr + " in the tabix index. ", path); if (!mp.contains(chr)) { return new EmptyIterator(); } - final TabixIteratorLineReader lineReader = new TabixIteratorLineReader(tabixReader.query(tabixReader.chr2tid(chr), start - 1, end)); + final TabixIteratorLineReader lineReader = + new TabixIteratorLineReader(tabixReader.query(tabixReader.chr2tid(chr), start - 1, end)); return new FeatureIterator(lineReader, start - 1, end); } @Override public CloseableTribbleIterator iterator() throws IOException { - final InputStream is = new BlockCompressedInputStream(SeekableStreamFactory.getInstance().getStreamFor(path, wrapper)); + final InputStream is = new BlockCompressedInputStream( + SeekableStreamFactory.getInstance().getStreamFor(path, wrapper)); final PositionalBufferedStream stream = new PositionalBufferedStream(is); final LineReader reader = new SynchronousLineReader(stream); return new FeatureIterator(reader, 0, Integer.MAX_VALUE); @@ -172,13 +181,13 @@ protected void readNextRecord() throws IOException { try { f = ((AsciiFeatureCodec) codec).decode(nextLine); if (f == null) { - continue; // Skip + continue; // Skip } if (f.getStart() > end) { - return; // Done + return; // Done } if (f.getEnd() <= start) { - continue; // Skip + continue; // Skip } currentRecord = (T) f; @@ -204,8 +213,10 @@ public T next() { try { readNextRecord(); } catch (IOException e) { - throw new RuntimeIOException("Unable to read the next record, the last record was at " + - ret.getContig() + ":" + ret.getStart() + "-" + ret.getEnd(), e); + throw new RuntimeIOException( + "Unable to read the next record, the last record was at " + ret.getContig() + ":" + + ret.getStart() + "-" + ret.getEnd(), + e); } return ret; } diff --git a/src/main/java/htsjdk/tribble/Tribble.java b/src/main/java/htsjdk/tribble/Tribble.java index d8469c6ed5..9e1a244dd6 100644 --- a/src/main/java/htsjdk/tribble/Tribble.java +++ b/src/main/java/htsjdk/tribble/Tribble.java @@ -25,8 +25,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.tribble.util.ParsingUtils; -import htsjdk.tribble.util.TabixUtils; - import java.io.File; import java.nio.file.Path; @@ -34,13 +32,13 @@ * Common, tribble wide constants and static functions */ public class Tribble { - private Tribble() { } // can't be instantiated + private Tribble() {} // can't be instantiated /** * @deprecated since June 2019 Use {@link FileExtensions#TRIBBLE_INDEX} instead. */ @Deprecated - public final static String STANDARD_INDEX_EXTENSION = FileExtensions.TRIBBLE_INDEX; + public static final String STANDARD_INDEX_EXTENSION = FileExtensions.TRIBBLE_INDEX; /** * Return the name of the index file for the provided {@code filename} diff --git a/src/main/java/htsjdk/tribble/TribbleException.java b/src/main/java/htsjdk/tribble/TribbleException.java index abcbc25ca0..dc72411c17 100644 --- a/src/main/java/htsjdk/tribble/TribbleException.java +++ b/src/main/java/htsjdk/tribble/TribbleException.java @@ -23,12 +23,11 @@ */ package htsjdk.tribble; - /** * @author Aaron * * The base Tribble exception; this allows external libraries to catch any exception Tribble generates - * + * */ public class TribbleException extends RuntimeException { // what file or input source we are working from @@ -57,8 +56,7 @@ public void setSource(String source) { @Override public String getMessage() { String ret = super.getMessage(); - if ( source != null ) - ret = ret + ", for input source: " + source; + if (source != null) ret = ret + ", for input source: " + source; return ret; } @@ -66,24 +64,31 @@ public String getMessage() { // other more specific exceptions generated in Tribble // ////////////////////////////////////////////////////////////////////// - // ////////////////////////////////////////////////////////////////////// // Codec exception // ////////////////////////////////////////////////////////////////////// // if the line to decode is incorrect public static class InvalidDecodeLine extends TribbleException { - public InvalidDecodeLine(String message, String line) { super (message + ", line = " + line); } + public InvalidDecodeLine(String message, String line) { + super(message + ", line = " + line); + } - public InvalidDecodeLine(String message, int lineNo) { super (message + ", at line number " + lineNo); } + public InvalidDecodeLine(String message, int lineNo) { + super(message + ", at line number " + lineNo); + } } public static class InvalidHeader extends TribbleException { - public InvalidHeader(String message) { super ("Your input file has a malformed header: " + message); } + public InvalidHeader(String message) { + super("Your input file has a malformed header: " + message); + } } // capture other internal codec exceptions public static class InternalCodecException extends TribbleException { - public InternalCodecException(String message) { super (message); } + public InternalCodecException(String message) { + super(message); + } } // ////////////////////////////////////////////////////////////////////// @@ -91,8 +96,9 @@ public static class InternalCodecException extends TribbleException { // ////////////////////////////////////////////////////////////////////// public static class UnableToCreateCorrectIndexType extends TribbleException { public UnableToCreateCorrectIndexType(String message, Exception e) { - super(message,e); + super(message, e); } + public UnableToCreateCorrectIndexType(String message) { super(message); } @@ -110,9 +116,10 @@ public FeatureFileDoesntExist(String message, String file) { public static class MalformedFeatureFile extends TribbleException { public MalformedFeatureFile(String message, String f, Exception e) { - super(message,e); + super(message, e); setSource(f); } + public MalformedFeatureFile(String message, String f) { super(message); setSource(f); @@ -121,21 +128,21 @@ public MalformedFeatureFile(String message, String f) { public static class UnableToReadIndexFile extends TribbleException { public UnableToReadIndexFile(String message, String f, Exception e) { - super(message,e); + super(message, e); setSource(f); } } public static class CorruptedIndexFile extends TribbleException { public CorruptedIndexFile(String message, String f, Exception e) { - super(message,e); + super(message, e); setSource(f); } } public static class TabixReaderFailure extends TribbleException { public TabixReaderFailure(String message, String f, Exception e) { - super(message,e); + super(message, e); setSource(f); } diff --git a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java index f8fcee1619..0a5242ac5c 100644 --- a/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java +++ b/src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java @@ -26,22 +26,18 @@ import htsjdk.io.HtsPath; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; +import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; -import htsjdk.samtools.util.FileExtensions; import htsjdk.tribble.index.Block; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.readers.PositionalBufferedStream; import htsjdk.tribble.util.ParsingUtils; - import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.URI; -import java.net.URLEncoder; import java.nio.channels.SeekableByteChannel; -import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -83,19 +79,26 @@ public class TribbleIndexedFeatureReader extends Abst * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist * @throws IOException */ - public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex) throws IOException { + public TribbleIndexedFeatureReader( + final String featurePath, final FeatureCodec codec, final boolean requireIndex) + throws IOException { this(featurePath, codec, requireIndex, null, null); } - public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec codec, final boolean requireIndex, - Function wrapper, - Function indexWrapper) throws IOException { + public TribbleIndexedFeatureReader( + final String featurePath, + final FeatureCodec codec, + final boolean requireIndex, + Function wrapper, + Function indexWrapper) + throws IOException { super(featurePath, codec, wrapper, indexWrapper); if (requireIndex) { this.loadIndex(); if (!this.hasIndex()) { - throw new TribbleException("An index is required, but none found with file ending " + FileExtensions.TRIBBLE_INDEX); + throw new TribbleException( + "An index is required, but none found with file ending " + FileExtensions.TRIBBLE_INDEX); } } @@ -111,7 +114,12 @@ public TribbleIndexedFeatureReader(final String featurePath, final FeatureCodec< * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist * @throws IOException */ - public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex) throws IOException { + public TribbleIndexedFeatureReader( + final String featureFile, + final String indexFile, + final FeatureCodec codec, + final boolean requireIndex) + throws IOException { this(featureFile, indexFile, codec, requireIndex, null, null); } @@ -123,9 +131,14 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF * @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist * @throws IOException */ - public TribbleIndexedFeatureReader(final String featureFile, final String indexFile, final FeatureCodec codec, final boolean requireIndex, - Function wrapper, - Function indexWrapper) throws IOException { + public TribbleIndexedFeatureReader( + final String featureFile, + final String indexFile, + final FeatureCodec codec, + final boolean requireIndex, + Function wrapper, + Function indexWrapper) + throws IOException { this(featureFile, codec, false, wrapper, indexWrapper); // required to read the header if (indexFile != null && ParsingUtils.resourceExists(indexFile)) { index = IndexFactory.loadIndex(indexFile, indexWrapper); @@ -134,7 +147,8 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF if (requireIndex) { this.loadIndex(); if (!this.hasIndex()) { - throw new TribbleException("An index is required, but none found with file ending " + FileExtensions.TRIBBLE_INDEX); + throw new TribbleException( + "An index is required, but none found with file ending " + FileExtensions.TRIBBLE_INDEX); } } } @@ -146,7 +160,8 @@ public TribbleIndexedFeatureReader(final String featureFile, final String indexF * @param index - a tribble Index object * @throws IOException */ - public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec codec, final Index index) throws IOException { + public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec codec, final Index index) + throws IOException { this(featureFile, codec, false); // required to read the header this.index = index; this.needCheckForIndex = false; @@ -262,7 +277,8 @@ private void readHeader() throws IOException { final SOURCE source = codec.makeSourceFromStream(pbs); header = codec.readHeader(source); } catch (Exception e) { - throw new TribbleException.MalformedFeatureFile("Unable to parse header with error: " + e.getMessage(), path, e); + throw new TribbleException.MalformedFeatureFile( + "Unable to parse header with error: " + e.getMessage(), path, e); } finally { if (pbs != null) pbs.close(); else if (is != null) is.close(); @@ -331,7 +347,7 @@ public WFIterator() throws IOException { // Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls, // and seekableStream does not support single byte reads final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000)); - pbs = new PositionalBufferedStream(is, 1000); // Small buffer as this is buffered already. + pbs = new PositionalBufferedStream(is, 1000); // Small buffer as this is buffered already. } else { pbs = new PositionalBufferedStream(inputStream, 512000); } @@ -355,8 +371,10 @@ public T next() { try { readNextRecord(); } catch (IOException e) { - throw new RuntimeIOException("Unable to read the next record, the last record was at " + - ret.getContig() + ":" + ret.getStart() + "-" + ret.getEnd(), e); + throw new RuntimeIOException( + "Unable to read the next record, the last record was at " + ret.getContig() + ":" + + ret.getStart() + "-" + ret.getEnd(), + e); } return ret; } @@ -392,8 +410,12 @@ private void readNextRecord() throws IOException { if (previousRecord == null) { error = String.format("Error parsing %s at the first record", source); } else { - error = String.format("Error parsing %s just after record at: %s:%d-%d", - source.toString(), previousRecord.getContig(), previousRecord.getStart(), previousRecord.getEnd()); + error = String.format( + "Error parsing %s just after record at: %s:%d-%d", + source.toString(), + previousRecord.getContig(), + previousRecord.getStart(), + previousRecord.getEnd()); } throw new TribbleException.MalformedFeatureFile(error, path, e); } @@ -430,7 +452,8 @@ class QueryIterator implements CloseableTribbleIterator { private SeekableStream mySeekableStream; private Iterator blockIterator; - public QueryIterator(final String chr, final int start, final int end, final List blocks) throws IOException { + public QueryIterator(final String chr, final int start, final int end, final List blocks) + throws IOException { this.start = start; this.end = end; @@ -458,8 +481,10 @@ public T next() { try { readNextRecord(); } catch (IOException e) { - throw new RuntimeIOException("Unable to read the next record, the last record was at " + - ret.getContig() + ":" + ret.getStart() + "-" + ret.getEnd(), e); + throw new RuntimeIOException( + "Unable to read the next record, the last record was at " + ret.getContig() + ":" + + ret.getStart() + "-" + ret.getEnd(), + e); } return ret; } @@ -468,8 +493,10 @@ private void advanceBlock() throws IOException { while (blockIterator != null && blockIterator.hasNext()) { final Block block = blockIterator.next(); if (block.getSize() > 0) { - final int bufferSize = Math.min(2_000_000, block.getSize() > 100_000_000 ? 10_000_000 : (int) block.getSize()); - source = codec.makeSourceFromStream(new PositionalBufferedStream(new BlockStreamWrapper(mySeekableStream, block), bufferSize)); + final int bufferSize = + Math.min(2_000_000, block.getSize() > 100_000_000 ? 10_000_000 : (int) block.getSize()); + source = codec.makeSourceFromStream( + new PositionalBufferedStream(new BlockStreamWrapper(mySeekableStream, block), bufferSize)); // note we don't have to skip the header here as the block should never start in the header return; } @@ -491,32 +518,32 @@ private void readNextRecord() throws IOException { final T previousRecord = currentRecord; if (source == null) { - return; // <= no more features to read + return; // <= no more features to read } currentRecord = null; - while (true) { // Loop through blocks - while (!codec.isDone(source)) { // Loop through current block + while (true) { // Loop through blocks + while (!codec.isDone(source)) { // Loop through current block final T f; try { f = codec.decode(source); if (f == null) { - continue; // Skip + continue; // Skip } if ((chrAlias != null && !f.getContig().equals(chrAlias)) || f.getStart() > end) { if (blockIterator.hasNext()) { advanceBlock(); continue; } else { - return; // Done + return; // Done } } if (f.getEnd() < start) { - continue; // Skip + continue; // Skip } - currentRecord = f; // Success + currentRecord = f; // Success return; } catch (TribbleException e) { @@ -526,18 +553,24 @@ private void readNextRecord() throws IOException { final String error; if (previousRecord == null) { - error = String.format("Error parsing %s at the first queried after %s:%d", source, this.chrAlias == null ? this.queryChr : this.chrAlias, this.start); + error = String.format( + "Error parsing %s at the first queried after %s:%d", + source, this.chrAlias == null ? this.queryChr : this.chrAlias, this.start); } else { - error = String.format("Error parsing %s just after record at: %s:%d-%d", - source.toString(), previousRecord.getContig(), previousRecord.getStart(), previousRecord.getEnd()); + error = String.format( + "Error parsing %s just after record at: %s:%d-%d", + source.toString(), + previousRecord.getContig(), + previousRecord.getStart(), + previousRecord.getEnd()); } throw new TribbleException.MalformedFeatureFile(error, path, e); } } if (blockIterator != null && blockIterator.hasNext()) { - advanceBlock(); // Advance to next block + advanceBlock(); // Advance to next block } else { - return; // No blocks left, we're done. + return; // No blocks left, we're done. } } } diff --git a/src/main/java/htsjdk/tribble/annotation/Strand.java b/src/main/java/htsjdk/tribble/annotation/Strand.java index 6b2302442f..d051d249ee 100644 --- a/src/main/java/htsjdk/tribble/annotation/Strand.java +++ b/src/main/java/htsjdk/tribble/annotation/Strand.java @@ -42,7 +42,7 @@ public enum Strand { * Denotes that a strand designation is not applicable * or is unknown. */ - NONE('.'); // not really sure what we should do for the NONE Enum + NONE('.'); // not really sure what we should do for the NONE Enum /** * Common alias for the {@link #POSITIVE} strand. @@ -57,7 +57,7 @@ public enum Strand { /** * Cached array of instances. */ - private final static Strand[] VALUES = values(); + private static final Strand[] VALUES = values(); /** * How we represent the strand as a single {@code char}. @@ -92,7 +92,7 @@ public static Strand toStrand(final String encoding) { * the encoding char is not recognized. */ public static Strand decode(final char ch) { - for(final Strand value : VALUES) { + for (final Strand value : VALUES) { if (value.charEncoding == ch) { return value; } @@ -133,4 +133,4 @@ public char encodeAsChar() { public String toString() { return stringEncoding; } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/bed/BEDCodec.java b/src/main/java/htsjdk/tribble/bed/BEDCodec.java index 90d83623fa..9c54483db2 100644 --- a/src/main/java/htsjdk/tribble/bed/BEDCodec.java +++ b/src/main/java/htsjdk/tribble/bed/BEDCodec.java @@ -25,13 +25,11 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; -import htsjdk.tribble.AbstractFeatureReader; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.annotation.Strand; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.util.ParsingUtils; - import java.util.regex.Pattern; /** @@ -61,7 +59,6 @@ public BEDCodec() { this(StartOffset.ONE); } - /** * BED format is 0-based, but Tribble is 1-based. * Set desired start position at either ZERO or ONE @@ -71,7 +68,6 @@ public BEDCodec(final StartOffset startOffset) { this.startOffsetValue = startOffset.value(); } - public BEDFeature decodeLoc(String line) { return decode(line); } @@ -121,7 +117,9 @@ public Object readActualHeader(final LineIterator lineIterator) { // Return true if the candidateLine looks like a BED header line. private boolean isBEDHeaderLine(final String candidateLine) { - return candidateLine.startsWith("#") || candidateLine.startsWith("track") || candidateLine.startsWith("browser"); + return candidateLine.startsWith("#") + || candidateLine.startsWith("track") + || candidateLine.startsWith("browser"); } public BEDFeature decode(String[] tokens) { @@ -172,8 +170,7 @@ public BEDFeature decode(String[] tokens) { // Strand if (tokenCount > 5) { String strandString = tokens[5].trim(); - char strand = (strandString.isEmpty()) - ? ' ' : strandString.charAt(0); + char strand = (strandString.isEmpty()) ? ' ' : strandString.charAt(0); if (strand == '-') { feature.setStrand(Strand.NEGATIVE); @@ -184,7 +181,7 @@ public BEDFeature decode(String[] tokens) { } } - //Color + // Color if (tokenCount > 8) { String colorString = tokens[8]; feature.setColor(ParsingUtils.parseColor(colorString)); @@ -199,12 +196,12 @@ public BEDFeature decode(String[] tokens) { } protected boolean readHeaderLine(String line) { - //We don't parse BED header + // We don't parse BED header return false; } - private void createExons(int start, String[] tokens, FullBEDFeature gene, - Strand strand) throws NumberFormatException { + private void createExons(int start, String[] tokens, FullBEDFeature gene, Strand strand) + throws NumberFormatException { int cdStart = Integer.parseInt(tokens[6]) + startOffsetValue; int cdEnd = Integer.parseInt(tokens[7]); diff --git a/src/main/java/htsjdk/tribble/bed/BEDFeature.java b/src/main/java/htsjdk/tribble/bed/BEDFeature.java index a342b65317..66f9a1a4be 100644 --- a/src/main/java/htsjdk/tribble/bed/BEDFeature.java +++ b/src/main/java/htsjdk/tribble/bed/BEDFeature.java @@ -25,7 +25,6 @@ import htsjdk.tribble.NamedFeature; import htsjdk.tribble.annotation.Strand; - import java.awt.*; /** diff --git a/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java b/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java index 975777dc28..e0c5bf6af3 100644 --- a/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java +++ b/src/main/java/htsjdk/tribble/bed/FullBEDFeature.java @@ -24,7 +24,6 @@ package htsjdk.tribble.bed; import htsjdk.tribble.annotation.Strand; - import java.util.ArrayList; /** @@ -36,7 +35,6 @@ public class FullBEDFeature extends SimpleBEDFeature implements BEDFeature { public FullBEDFeature(String chr, int start, int end) { super(start, end, chr); - } @Override @@ -77,12 +75,14 @@ public class Exon { * codon is number "1". */ private int number; + private int readingFrame = -1; /** * Coding start position. This is the leftmost position of the coding region, not neccessarily the 5'utr end */ private int codingStart; + private int codingEnd; boolean utr = false; @@ -90,12 +90,10 @@ public class Exon { // to either the beginning or end of the exon, depending on the strand private int mrnaBase = -1; - public void setMrnaBase(int base) { this.mrnaBase = base; } - /** * Constructs ... * @@ -166,7 +164,6 @@ public void setPhase(int phase) { } } - /** * Method description * @@ -203,7 +200,6 @@ int getReadingShift() { return readingFrame; } - public String getValueString(double position) { String msg = number > 0 ? "Exon number: " + number : ""; return msg; @@ -218,7 +214,6 @@ public void setNumber(int number) { } } - public class Exon2 { /** @@ -226,23 +221,24 @@ public class Exon2 { * codon is number "1". */ private int number; + private int readingFrame = -1; /** * Coding start position. This is the leftmost position of the coding region, not neccessarily the 5'utr end */ private int start; + private int end; private int codingStart; private int codingEnd; - //private AminoAcidSequence aminoAcidSequence; + // private AminoAcidSequence aminoAcidSequence; boolean utr = false; // The position of the first base of this exon relative to the start of the mRNA. This will correspond // to either the beginning or end of the exon, depending on the strand private int mrnaBase = -1; - public Exon2(int start, int end, int codingStart, int codingDne) { this.start = start; @@ -251,7 +247,6 @@ public Exon2(int start, int end, int codingStart, int codingDne) { this.codingEnd = codingDne; } - public void setMrnaBase(int base) { this.mrnaBase = base; } @@ -331,7 +326,6 @@ public void setPhase(int phase) { } } - /** * Method description * @@ -368,7 +362,6 @@ int getReadingShift() { return readingFrame; } - /** * Method description * @@ -383,7 +376,6 @@ public AminoAcidSequence getAminoAcidSequence() { } */ - /* public void setAminoAcidSequence(AminoAcidSequence aminoAcidSequence) { this.aminoAcidSequence = aminoAcidSequence; @@ -411,7 +403,6 @@ private void computeAminoAcidSequence() { } */ - public String getValueString(double position) { String msg = number > 0 ? "Exon number: " + number : ""; int aaNumber = this.getAminoAcidNumber((int) position); @@ -428,7 +419,5 @@ public int getNumber() { public void setNumber(int number) { this.number = number; } - } - } diff --git a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java index 4a64168677..aa700eb3a2 100644 --- a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java +++ b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java @@ -24,7 +24,6 @@ package htsjdk.tribble.bed; import htsjdk.tribble.annotation.Strand; - import java.awt.*; import java.util.ArrayList; import java.util.List; @@ -41,8 +40,8 @@ public class SimpleBEDFeature implements BEDFeature { private float score = Float.NaN; private String type = ""; private Color color; - private String description;//protected float confidence; - //private String identifier; + private String description; // protected float confidence; + // private String identifier; private String link; public SimpleBEDFeature(int start, int end, String chr) { @@ -141,7 +140,7 @@ public void setLink(String link) { this.link = link; } - final static List emptyExonList = new ArrayList(); + static final List emptyExonList = new ArrayList(); @Override public java.util.List getExons() { diff --git a/src/main/java/htsjdk/tribble/example/CountRecords.java b/src/main/java/htsjdk/tribble/example/CountRecords.java index 2e6b4aafc2..6cec76cfbf 100644 --- a/src/main/java/htsjdk/tribble/example/CountRecords.java +++ b/src/main/java/htsjdk/tribble/example/CountRecords.java @@ -33,7 +33,6 @@ import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.index.linear.LinearIndex; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; @@ -56,10 +55,9 @@ public class CountRecords { * @param args a single parameter, the file name to load */ public static void main(String[] args) { - + // check yourself before you wreck yourself - we require one arg, the input file - if (args.length > 2) - printUsage(); + if (args.length > 2) printUsage(); // our feature file File featureFile = new File(args[0]); @@ -74,7 +72,6 @@ public static void main(String[] args) { FeatureCodec codec = getFeatureCodec(featureFile); runWithIndex(featureFile, codec, optimizeIndex); - } /** @@ -88,13 +85,12 @@ public static void main(String[] args) { public static long runWithIndex(File featureInput, FeatureCodec codec, int optimizeThreshold) { // get an index Index index = loadIndex(featureInput, codec); - if ( optimizeThreshold != -1 ) - ((LinearIndex)index).optimize(optimizeThreshold); + if (optimizeThreshold != -1) ((LinearIndex) index).optimize(optimizeThreshold); // get a reader AbstractFeatureReader reader = null; try { - reader = AbstractFeatureReader.getFeatureReader(featureInput.getAbsolutePath(), codec, index); + reader = AbstractFeatureReader.getFeatureReader(featureInput.getAbsolutePath(), codec, index); // now read iterate over the file long recordCount = 0l; @@ -147,10 +143,10 @@ public static Index loadIndex(File featureFile, FeatureCodec codec) { if (indexFile.canRead()) { System.err.println("Loading index from disk for index file -> " + indexFile); index = IndexFactory.loadIndex(indexFile.getAbsolutePath()); - // else we want to make the index, and write it to disk if possible + // else we want to make the index, and write it to disk if possible } else { System.err.println("Creating the index and memory, then writing to disk for index file -> " + indexFile); - index = createAndWriteNewIndex(featureFile,indexFile,codec); + index = createAndWriteNewIndex(featureFile, indexFile, codec); } return index; @@ -168,14 +164,15 @@ public static Index createAndWriteNewIndex(File featureFile, File indexFile, Fea Index index = IndexFactory.createLinearIndex(featureFile, codec); // try to write it to disk - LittleEndianOutputStream stream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))); - + LittleEndianOutputStream stream = + new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(indexFile))); + index.write(stream); stream.close(); return index; } catch (IOException e) { - throw new RuntimeIOException("Unable to create index from file " + featureFile,e); + throw new RuntimeIOException("Unable to create index from file " + featureFile, e); } } @@ -188,8 +185,8 @@ public static Index createAndWriteNewIndex(File featureFile, File indexFile, Fea */ public static FeatureCodec getFeatureCodec(File featureFile) { // quickly determine the codec type - if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") ) - return new BEDCodec(); - throw new IllegalArgumentException("Unable to determine correct file type based on the file name, for file -> " + featureFile); + if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED")) return new BEDCodec(); + throw new IllegalArgumentException( + "Unable to determine correct file type based on the file name, for file -> " + featureFile); } } diff --git a/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java b/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java index 9628cc4fe6..341126cd47 100644 --- a/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java +++ b/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java @@ -24,16 +24,15 @@ package htsjdk.tribble.example; import htsjdk.tribble.AbstractFeatureReader; -import htsjdk.tribble.SimpleFeature; import htsjdk.tribble.BinaryFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.FeatureCodecHeader; import htsjdk.tribble.FeatureReader; +import htsjdk.tribble.SimpleFeature; import htsjdk.tribble.readers.AsciiLineReader; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.readers.PositionalBufferedStream; - import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; @@ -50,7 +49,7 @@ * @author Mark DePristo */ public class ExampleBinaryCodec extends BinaryFeatureCodec { - public final static String HEADER_LINE = "# BinaryTestFeature"; + public static final String HEADER_LINE = "# BinaryTestFeature"; @Override public Feature decodeLoc(final PositionalBufferedStream stream) throws IOException { @@ -68,7 +67,8 @@ public Feature decode(final PositionalBufferedStream stream) throws IOException @Override public FeatureCodecHeader readHeader(final PositionalBufferedStream stream) throws IOException { - // Construct a reader that does not read ahead (because we don't want to consume data from the stream that is not the header) + // Construct a reader that does not read ahead (because we don't want to consume data from the stream that is + // not the header) final AsciiLineReader nonReadAheadLineReader = new AsciiLineReader(stream); final List headerLines = new ArrayList(); long headerLengthInBytes = 0; @@ -84,6 +84,7 @@ public Class getFeatureType() { return Feature.class; } + @Override public boolean canDecode(final String path) { return false; @@ -99,8 +100,11 @@ public boolean canDecode(final String path) { * @param codec of the source file features * @throws IOException */ - public static void convertToBinaryTest(final File source, final File dest, final FeatureCodec codec) throws IOException { - final FeatureReader reader = AbstractFeatureReader.getFeatureReader(source.getAbsolutePath(), codec, false); // IndexFactory.loadIndex(idxFile)); + public static void convertToBinaryTest( + final File source, final File dest, final FeatureCodec codec) + throws IOException { + final FeatureReader reader = AbstractFeatureReader.getFeatureReader( + source.getAbsolutePath(), codec, false); // IndexFactory.loadIndex(idxFile)); final OutputStream output = new FileOutputStream(dest); ExampleBinaryCodec.convertToBinaryTest(reader, output); } @@ -112,11 +116,12 @@ public static void convertToBinaryTest(final File * * @throws IOException */ - public static void convertToBinaryTest(final FeatureReader reader, final OutputStream out) throws IOException { + public static void convertToBinaryTest( + final FeatureReader reader, final OutputStream out) throws IOException { DataOutputStream dos = new DataOutputStream(out); dos.writeBytes(HEADER_LINE + "\n"); Iterator it = reader.iterator(); - while ( it.hasNext() ) { + while (it.hasNext()) { final Feature f = it.next(); dos.writeUTF(f.getContig()); dos.writeInt(f.getStart()); diff --git a/src/main/java/htsjdk/tribble/example/IndexToTable.java b/src/main/java/htsjdk/tribble/example/IndexToTable.java index 9cf6c702ee..f6055ff9b3 100644 --- a/src/main/java/htsjdk/tribble/example/IndexToTable.java +++ b/src/main/java/htsjdk/tribble/example/IndexToTable.java @@ -26,13 +26,11 @@ import htsjdk.tribble.index.IndexFactory; import htsjdk.tribble.index.linear.LinearIndex; - import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintStream; - public class IndexToTable { /** @@ -49,14 +47,13 @@ public class IndexToTable { public static void main(String[] args) { // check yourself before you wreck yourself - we require one arg, the input file - if (args.length != 2) - printUsage(); + if (args.length != 2) printUsage(); - //LinearIndex.enableAdaptiveIndexing = false; + // LinearIndex.enableAdaptiveIndexing = false; LinearIndex idx = (LinearIndex) IndexFactory.loadIndex(new File(args[0]).getAbsolutePath()); try { idx.writeTable(new PrintStream(new FileOutputStream(new File(args[1])))); - } catch ( FileNotFoundException e ) { + } catch (FileNotFoundException e) { e.printStackTrace(); System.exit(1); } @@ -69,4 +66,4 @@ public static void printUsage() { System.err.println("Usage: java -jar IndexToTable.jar index.file output.table"); System.exit(1); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/example/IndicesAreEqual.java b/src/main/java/htsjdk/tribble/example/IndicesAreEqual.java index b0ea7127f3..95b0171c46 100644 --- a/src/main/java/htsjdk/tribble/example/IndicesAreEqual.java +++ b/src/main/java/htsjdk/tribble/example/IndicesAreEqual.java @@ -25,7 +25,6 @@ import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexFactory; - import java.io.File; /** @@ -38,8 +37,7 @@ public class IndicesAreEqual { * @param args 2 parameters, the paths of the two index files to compare */ public static void main(String[] args) { - if ( args.length != 2 ) - printUsage(); + if (args.length != 2) printUsage(); else { Index index1 = loadIndex(args[0]); Index index2 = loadIndex(args[1]); @@ -64,7 +62,7 @@ public static void printUsage() { * @return an index instance */ public static Index loadIndex(String filename) { - //System.err.println("Loading index from disk for index file -> " + filename); + // System.err.println("Loading index from disk for index file -> " + filename); File file = new File(filename); if (file.canRead()) { return IndexFactory.loadIndex(file.getAbsolutePath()); diff --git a/src/main/java/htsjdk/tribble/example/ProfileIndexReading.java b/src/main/java/htsjdk/tribble/example/ProfileIndexReading.java index d39a571122..22d287698c 100644 --- a/src/main/java/htsjdk/tribble/example/ProfileIndexReading.java +++ b/src/main/java/htsjdk/tribble/example/ProfileIndexReading.java @@ -39,15 +39,14 @@ public class ProfileIndexReading { public static void main(String[] args) { // check yourself before you wreck yourself - we require one arg, the input file - if (args.length < 2) - printUsage(); + if (args.length < 2) printUsage(); int iterations = Integer.parseInt(args[0]); - for ( int j = 1; j < args.length; j++ ) { + for (int j = 1; j < args.length; j++) { String indexFile = args[j]; System.out.printf("Reading %s%n", indexFile); long startTime = System.currentTimeMillis(); - for ( int i = 0; i < iterations; i++ ) { + for (int i = 0; i < iterations; i++) { System.out.printf(" iteration %d%n", i); Index index = IndexFactory.loadIndex(indexFile); } @@ -63,4 +62,4 @@ public static void printUsage() { System.err.println("Usage: java -jar ReadIndices.jar iterations index.file..."); System.exit(1); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/exception/CodecLineParsingException.java b/src/main/java/htsjdk/tribble/exception/CodecLineParsingException.java index 8cc30428bb..851e6d1c58 100644 --- a/src/main/java/htsjdk/tribble/exception/CodecLineParsingException.java +++ b/src/main/java/htsjdk/tribble/exception/CodecLineParsingException.java @@ -23,7 +23,6 @@ */ package htsjdk.tribble.exception; - /** * Class CodecLineParsingException * @@ -45,6 +44,5 @@ public CodecLineParsingException(String message) { super(message); } - public CodecLineParsingException() { - } + public CodecLineParsingException() {} } diff --git a/src/main/java/htsjdk/tribble/exception/UnsortedFileException.java b/src/main/java/htsjdk/tribble/exception/UnsortedFileException.java index 7f5e387071..2609878a43 100644 --- a/src/main/java/htsjdk/tribble/exception/UnsortedFileException.java +++ b/src/main/java/htsjdk/tribble/exception/UnsortedFileException.java @@ -40,6 +40,5 @@ public UnsortedFileException(String message) { super(message); } - public UnsortedFileException() { - } -} \ No newline at end of file + public UnsortedFileException() {} +} diff --git a/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java b/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java index 8d35978860..afb0f05c1f 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java @@ -2,7 +2,6 @@ import htsjdk.samtools.util.Locatable; import htsjdk.tribble.annotation.Strand; - import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; @@ -25,9 +24,16 @@ public class Gff3BaseData implements Locatable { private final List aliases; private final int hashCode; - public Gff3BaseData(final String contig, final String source, final String type, - final int start, final int end, final Double score, final Strand strand, final int phase, - final Map> attributes) { + public Gff3BaseData( + final String contig, + final String source, + final String type, + final int start, + final int end, + final Double score, + final Strand strand, + final int phase, + final Map> attributes) { this.contig = contig; this.source = source; this.type = type; @@ -59,20 +65,20 @@ public boolean equals(Object other) { if (other == this) { return true; } - if(!other.getClass().equals(Gff3BaseData.class)) { + if (!other.getClass().equals(Gff3BaseData.class)) { return false; } final Gff3BaseData otherBaseData = (Gff3BaseData) other; - boolean ret = otherBaseData.getContig().equals(getContig()) && - otherBaseData.getSource().equals(getSource()) && - otherBaseData.getType().equals(getType()) && - otherBaseData.getStart() == getStart() && - otherBaseData.getEnd() == getEnd() && - ((Double)otherBaseData.getScore()).equals(score) && - otherBaseData.getPhase() == getPhase() && - otherBaseData.getStrand().equals(getStrand()) && - otherBaseData.getAttributes().equals(getAttributes()); + boolean ret = otherBaseData.getContig().equals(getContig()) + && otherBaseData.getSource().equals(getSource()) + && otherBaseData.getType().equals(getType()) + && otherBaseData.getStart() == getStart() + && otherBaseData.getEnd() == getEnd() + && ((Double) otherBaseData.getScore()).equals(score) + && otherBaseData.getPhase() == getPhase() + && otherBaseData.getStrand().equals(getStrand()) + && otherBaseData.getAttributes().equals(getAttributes()); if (getId() == null) { ret = ret && otherBaseData.getId() == null; } else { @@ -82,7 +88,9 @@ public boolean equals(Object other) { if (getName() == null) { ret = ret && otherBaseData.getName() == null; } else { - ret = ret && otherBaseData.getName() != null && otherBaseData.getName().equals(getName()); + ret = ret + && otherBaseData.getName() != null + && otherBaseData.getName().equals(getName()); } ret = ret && otherBaseData.getAliases().equals(getAliases()); @@ -157,9 +165,9 @@ public Map> getAttributes() { return attributes; } - /** + /** * get the values as List for the key, or an empty list if this key is not present - * + * * @param key key whose presence in this map is to be tested * @return the values as List, or an empty list if this key is not present */ @@ -169,33 +177,37 @@ public List getAttribute(final String key) { /** * Returns true if this record contains an attribute for the specified key. - * + * * @param key key whose presence in this map is to be tested * @return true if this map contains an attribute for the specified key */ public boolean hasAttribute(final String key) { return attributes.containsKey(key); } - + /** - * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... + * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... * This function returns an Optional.empty if the key is not present, * an Optional.of(value) if there is only one value associated to the key, * or it throws an IllegalArgumentException if there is more than one value. - * + * * @param key key whose presence in the attributes is to be tested * @return Optional<String> if this map contains zero or one attribute for the specified key * @throws IllegalArgumentException if there is more than one value */ public Optional getUniqueAttribute(final String key) { final List atts = getAttribute(key); - switch(atts.size()) { - case 0 : return Optional.empty(); - case 1 : return Optional.of(atts.get(0)); - default : throw new IllegalArgumentException("getUniqueAttribute cannot be called with key="+key+" because it contains more than one value " + String.join(", ", atts)); + switch (atts.size()) { + case 0: + return Optional.empty(); + case 1: + return Optional.of(atts.get(0)); + default: + throw new IllegalArgumentException("getUniqueAttribute cannot be called with key=" + key + + " because it contains more than one value " + String.join(", ", atts)); } } - + public String getId() { return id; } diff --git a/src/main/java/htsjdk/tribble/gff/Gff3Codec.java b/src/main/java/htsjdk/tribble/gff/Gff3Codec.java index 6bf5be284c..945840a562 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3Codec.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3Codec.java @@ -4,7 +4,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.LocationAware; - import htsjdk.samtools.util.Log; import htsjdk.tribble.AbstractFeatureCodec; import htsjdk.tribble.Feature; @@ -14,9 +13,6 @@ import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.readers.*; import htsjdk.tribble.util.ParsingUtils; - - - import java.io.*; import java.net.URLDecoder; import java.nio.file.Files; @@ -34,11 +30,8 @@ * {@link Gff3Feature#getAncestors()}, {@link Gff3Feature#getDescendents()}, amd {@link Gff3Feature#flatten()}. This linking is not guaranteed to be comprehensive when the file is read for only features overlapping a particular * region, using a tribble index. In this case, a particular feature will only be linked to the subgroup of features it is linked to in the input file which overlap the given region. */ - public class Gff3Codec extends AbstractFeatureCodec { - - private static final int NUM_FIELDS = 9; private static final int CHROMOSOME_NAME_INDEX = 0; @@ -51,7 +44,6 @@ public class Gff3Codec extends AbstractFeatureCodec { private static final int GENOMIC_PHASE_INDEX = 7; private static final int EXTRA_FIELDS_INDEX = 8; - private static final String IS_CIRCULAR_ATTRIBUTE_KEY = "Is_circular"; private static final String ARTEMIS_FASTA_MARKER = ">"; @@ -64,7 +56,7 @@ public class Gff3Codec extends AbstractFeatureCodec { private final Map sequenceRegionMap = new LinkedHashMap<>(); private final Map commentsWithLineNumbers = new LinkedHashMap<>(); - private final static Log logger = Log.getInstance(Gff3Codec.class); + private static final Log logger = Log.getInstance(Gff3Codec.class); private boolean reachedFasta = false; @@ -74,7 +66,7 @@ public class Gff3Codec extends AbstractFeatureCodec { /** filter to removing keys from the EXTRA_FIELDS column */ private final Predicate filterOutAttribute; - + public Gff3Codec() { this(DecodeDepth.DEEP); } @@ -92,18 +84,20 @@ public Gff3Codec(final DecodeDepth decodeDepth, final Predicate filterOu this.decodeDepth = decodeDepth; this.filterOutAttribute = filterOutAttribute; /* check required keys are always kept */ - for (final String key : new String[] {Gff3Constants.PARENT_ATTRIBUTE_KEY, Gff3Constants.ID_ATTRIBUTE_KEY, Gff3Constants.NAME_ATTRIBUTE_KEY}) { + for (final String key : new String[] { + Gff3Constants.PARENT_ATTRIBUTE_KEY, Gff3Constants.ID_ATTRIBUTE_KEY, Gff3Constants.NAME_ATTRIBUTE_KEY + }) { if (filterOutAttribute.test(key)) { throw new IllegalArgumentException("Predicate should always accept " + key); - } + } } } public enum DecodeDepth { - DEEP , + DEEP, SHALLOW } - + @Override public Gff3Feature decode(final LineIterator lineIterator) throws IOException { return decode(lineIterator, decodeDepth); @@ -117,7 +111,7 @@ private Gff3Feature decode(final LineIterator lineIterator, final DecodeDepth de list of active features and both maps. Always poll featuresToFlush to return any completed top level features. */ if (!lineIterator.hasNext()) { - //no more lines, flush whatever is active + // no more lines, flush whatever is active prepareToFlushFeatures(); return featuresToFlush.poll(); } @@ -125,13 +119,13 @@ private Gff3Feature decode(final LineIterator lineIterator, final DecodeDepth de final String line = lineIterator.next(); if (reachedFasta) { - //previously reached fasta, flush whatever is active + // previously reached fasta, flush whatever is active prepareToFlushFeatures(); return featuresToFlush.poll(); } if (line.startsWith(ARTEMIS_FASTA_MARKER)) { - //backwards compatability with Artemis is built into gff3 spec + // backwards compatability with Artemis is built into gff3 spec processDirective(Gff3Directive.FASTA_DIRECTIVE, null); return featuresToFlush.poll(); } @@ -146,12 +140,10 @@ private Gff3Feature decode(final LineIterator lineIterator, final DecodeDepth de return featuresToFlush.poll(); } - - final Gff3FeatureImpl thisFeature = new Gff3FeatureImpl(parseLine(line, currentLine, this.filterOutAttribute)); activeFeatures.add(thisFeature); if (depth == DecodeDepth.DEEP) { - //link to parents/children/co-features + // link to parents/children/co-features final List parentIDs = thisFeature.getAttribute(Gff3Constants.PARENT_ATTRIBUTE_KEY); final String id = thisFeature.getID(); @@ -190,40 +182,44 @@ private Gff3Feature decode(final LineIterator lineIterator, final DecodeDepth de validateFeature(thisFeature); if (depth == DecodeDepth.SHALLOW) { - //flush all features immediatly + // flush all features immediatly prepareToFlushFeatures(); } return featuresToFlush.poll(); } - /** * Parse attributes field for gff3 feature * @param attributesString attributes field string from line in gff3 file * @return map of keys to values for attributes of this feature * @throws UnsupportedEncodingException */ - static private Map> parseAttributes(final String attributesString) throws UnsupportedEncodingException { + private static Map> parseAttributes(final String attributesString) + throws UnsupportedEncodingException { if (attributesString.equals(Gff3Constants.UNDEFINED_FIELD_VALUE)) { return Collections.emptyMap(); } final Map> attributes = new LinkedHashMap<>(); - final List splitLine = ParsingUtils.split(attributesString,Gff3Constants.ATTRIBUTE_DELIMITER); - for(String attribute : splitLine) { - final List key_value = ParsingUtils.split(attribute,Gff3Constants.KEY_VALUE_SEPARATOR); + final List splitLine = ParsingUtils.split(attributesString, Gff3Constants.ATTRIBUTE_DELIMITER); + for (String attribute : splitLine) { + final List key_value = ParsingUtils.split(attribute, Gff3Constants.KEY_VALUE_SEPARATOR); if (key_value.size() != 2) { throw new TribbleException("Attribute string " + attributesString + " is invalid"); } - attributes.put(URLDecoder.decode(key_value.get(0).trim(), "UTF-8"), decodeAttributeValue(key_value.get(1).trim())); + attributes.put( + URLDecoder.decode(key_value.get(0).trim(), "UTF-8"), + decodeAttributeValue(key_value.get(1).trim())); } return attributes; } - private static Gff3BaseData parseLine(final String line, final int currentLine, final Predicate filterOutAttribute) { + private static Gff3BaseData parseLine( + final String line, final int currentLine, final Predicate filterOutAttribute) { final List splitLine = ParsingUtils.split(line, Gff3Constants.FIELD_DELIMITER); if (splitLine.size() != NUM_FIELDS) { - throw new TribbleException("Found an invalid number of columns in the given Gff3 file at line + " + currentLine + " - Given: " + splitLine.size() + " Expected: " + NUM_FIELDS + " : " + line); + throw new TribbleException("Found an invalid number of columns in the given Gff3 file at line + " + + currentLine + " - Given: " + splitLine.size() + " Expected: " + NUM_FIELDS + " : " + line); } try { @@ -232,17 +228,24 @@ private static Gff3BaseData parseLine(final String line, final int currentLine, final String type = URLDecoder.decode(splitLine.get(FEATURE_TYPE_INDEX), "UTF-8"); final int start = Integer.parseInt(splitLine.get(START_LOCATION_INDEX)); final int end = Integer.parseInt(splitLine.get(END_LOCATION_INDEX)); - final double score = splitLine.get(SCORE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) ? -1 : Double.parseDouble(splitLine.get(SCORE_INDEX)); - final int phase = splitLine.get(GENOMIC_PHASE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) ? -1 : Integer.parseInt(splitLine.get(GENOMIC_PHASE_INDEX)); + final double score = splitLine.get(SCORE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) + ? -1 + : Double.parseDouble(splitLine.get(SCORE_INDEX)); + final int phase = splitLine.get(GENOMIC_PHASE_INDEX).equals(Gff3Constants.UNDEFINED_FIELD_VALUE) + ? -1 + : Integer.parseInt(splitLine.get(GENOMIC_PHASE_INDEX)); final Strand strand = Strand.decode(splitLine.get(GENOMIC_STRAND_INDEX)); final Map> attributes = parseAttributes(splitLine.get(EXTRA_FIELDS_INDEX)); /* remove attibutes matching 'filterOutAttribute' */ attributes.keySet().removeIf(filterOutAttribute); return new Gff3BaseData(contig, source, type, start, end, score, strand, phase, attributes); - } catch (final NumberFormatException ex ) { - throw new TribbleException("Cannot read integer value for start/end position from line " + currentLine + ". Line is: " + line, ex); + } catch (final NumberFormatException ex) { + throw new TribbleException( + "Cannot read integer value for start/end position from line " + currentLine + ". Line is: " + line, + ex); } catch (final IOException ex) { - throw new TribbleException("Cannot decode feature info from line " + currentLine + ". Line is: " + line, ex); + throw new TribbleException( + "Cannot decode feature info from line " + currentLine + ". Line is: " + line, ex); } } @@ -279,13 +282,15 @@ private void validateFeature(final Gff3Feature feature) { if (sequenceRegionMap.containsKey(feature.getContig())) { final SequenceRegion region = sequenceRegionMap.get(feature.getContig()); if (feature.getStart() == region.getStart() && feature.getEnd() == region.getEnd()) { - //landmark feature - final boolean isCircular = Boolean.parseBoolean(extractSingleAttribute(feature.getAttribute(IS_CIRCULAR_ATTRIBUTE_KEY))); + // landmark feature + final boolean isCircular = + Boolean.parseBoolean(extractSingleAttribute(feature.getAttribute(IS_CIRCULAR_ATTRIBUTE_KEY))); region.setCircular(isCircular); } - if (region.isCircular()? !region.overlaps(feature) : !region.contains(feature)) { - throw new TribbleException("feature at " + feature.getContig() + ":" + feature.getStart() + "-" + feature.getEnd() + - " not contained in specified sequence region (" + region.getContig() + ":" + region.getStart() + "-" + region.getEnd()); + if (region.isCircular() ? !region.overlaps(feature) : !region.contains(feature)) { + throw new TribbleException("feature at " + feature.getContig() + ":" + feature.getStart() + "-" + + feature.getEnd() + " not contained in specified sequence region (" + region.getContig() + ":" + + region.getStart() + "-" + region.getEnd()); } } } @@ -306,9 +311,11 @@ public boolean canDecode(final String inputFilePath) { if (canDecode) { // Crack open the file and look at the top of it: - final InputStream inputStream = IOUtil.hasGzipFileExtension(p)? new GZIPInputStream(Files.newInputStream(p)) : Files.newInputStream(p); + final InputStream inputStream = IOUtil.hasGzipFileExtension(p) + ? new GZIPInputStream(Files.newInputStream(p)) + : Files.newInputStream(p); - try ( BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)) ) { + try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream))) { String line = br.readLine(); @@ -318,7 +325,7 @@ public boolean canDecode(final String inputFilePath) { } while (line.startsWith(Gff3Constants.COMMENT_START)) { line = br.readLine(); - if ( line == null ) { + if (line == null) { return false; } } @@ -340,20 +347,17 @@ public boolean canDecode(final String inputFilePath) { // check for strand final String strand = fields.get(GENOMIC_STRAND_INDEX); - canDecode &= strand.equals(Strand.POSITIVE.toString()) || - strand.equals(Strand.NEGATIVE.toString()) || - strand.equals(Strand.NONE.toString()) || - strand.equals("?"); + canDecode &= strand.equals(Strand.POSITIVE.toString()) + || strand.equals(Strand.NEGATIVE.toString()) + || strand.equals(Strand.NONE.toString()) + || strand.equals("?"); } } - } - } - catch (FileNotFoundException ex) { + } catch (FileNotFoundException ex) { logger.error(inputFilePath + " not found."); return false; - } - catch (final IOException ex) { + } catch (final IOException ex) { return false; } @@ -361,7 +365,7 @@ public boolean canDecode(final String inputFilePath) { } static List decodeAttributeValue(final String attributeValue) { - //split on VALUE_DELIMITER, then decode + // split on VALUE_DELIMITER, then decode final List splitValues = ParsingUtils.split(attributeValue, Gff3Constants.VALUE_DELIMITER); final List decodedValues = new ArrayList<>(); @@ -391,7 +395,7 @@ static String extractSingleAttribute(final List values) { public FeatureCodecHeader readHeader(LineIterator lineIterator) { List header = new ArrayList<>(); - while(lineIterator.hasNext()) { + while (lineIterator.hasNext()) { String line = lineIterator.peek(); if (line.startsWith(Gff3Constants.COMMENT_START)) { header.add(line); @@ -432,7 +436,8 @@ private void processDirective(final Gff3Directive directive, final Object decode case SEQUENCE_REGION_DIRECTIVE: final SequenceRegion newRegion = (SequenceRegion) decodedResult; if (sequenceRegionMap.containsKey(newRegion.getContig())) { - throw new TribbleException("directive for sequence-region " + newRegion.getContig() + " included more than once."); + throw new TribbleException( + "directive for sequence-region " + newRegion.getContig() + " included more than once."); } sequenceRegionMap.put(newRegion.getContig(), newRegion); break; @@ -446,8 +451,9 @@ private void processDirective(final Gff3Directive directive, final Object decode break; default: - throw new IllegalArgumentException( "Directive " + directive + " has been added to Gff3Directive, but is not being handled by Gff3Codec::processDirective. This is a BUG."); - + throw new IllegalArgumentException( + "Directive " + directive + + " has been added to Gff3Directive, but is not being handled by Gff3Codec::processDirective. This is a BUG."); } } @@ -478,7 +484,7 @@ public boolean isDone(final LineIterator lineIterator) { @Override public void close(final LineIterator lineIterator) { - //cleanup resources + // cleanup resources featuresToFlush.clear(); activeFeaturesWithIDs.clear(); activeFeatures.clear(); @@ -495,10 +501,9 @@ public TabixFormat getTabixFormat() { * Enum for parsing directive lines. If information in directive line needs to be parsed beyond specifying directive type, decode method should be overriden */ public enum Gff3Directive { - VERSION3_DIRECTIVE("##gff-version\\s+3(?:\\.\\d*)*$") { @Override - protected Object decode(final String line) throws IOException { + protected Object decode(final String line) throws IOException { final String[] splitLine = line.split("\\s+"); return splitLine[1]; } @@ -509,12 +514,13 @@ String encode(final Object object) { throw new TribbleException("Cannot encode null in VERSION3_DIRECTIVE"); } if (!(object instanceof String)) { - throw new TribbleException("Cannot encode object of type " + object.getClass() + " in VERSION3_DIRECTIVE"); + throw new TribbleException( + "Cannot encode object of type " + object.getClass() + " in VERSION3_DIRECTIVE"); } - final String versionLine = "##gff-version " + (String)object; + final String versionLine = "##gff-version " + (String) object; if (!regexPattern.matcher(versionLine).matches()) { - throw new TribbleException("Version " + (String)object + " is not a valid version"); + throw new TribbleException("Version " + (String) object + " is not a valid version"); } return versionLine; @@ -522,9 +528,10 @@ String encode(final Object object) { }, SEQUENCE_REGION_DIRECTIVE("##sequence-region\\s+.+ \\d+ \\d+$") { - final private int CONTIG_INDEX = 1; - final private int START_INDEX = 2; - final private int END_INDEX = 3; + private final int CONTIG_INDEX = 1; + private final int START_INDEX = 2; + private final int END_INDEX = 3; + @Override protected Object decode(final String line) throws IOException { final String[] splitLine = line.split("\\s+"); @@ -540,11 +547,13 @@ String encode(final Object object) { throw new TribbleException("Cannot encode null in SEQUENCE_REGION_DIRECTIVE"); } if (!(object instanceof SequenceRegion)) { - throw new TribbleException("Cannot encode object of type " + object.getClass() + " in SEQUENCE_REGION_DIRECTIVE"); + throw new TribbleException( + "Cannot encode object of type " + object.getClass() + " in SEQUENCE_REGION_DIRECTIVE"); } final SequenceRegion sequenceRegion = (SequenceRegion) object; - return "##sequence-region " + Gff3Writer.encodeString(sequenceRegion.getContig()) + " " + sequenceRegion.getStart() + " " + sequenceRegion.getEnd(); + return "##sequence-region " + Gff3Writer.encodeString(sequenceRegion.getContig()) + " " + + sequenceRegion.getStart() + " " + sequenceRegion.getEnd(); } }, @@ -570,7 +579,7 @@ String encode(final Object object) { public static Gff3Directive toDirective(final String line) { for (final Gff3Directive directive : Gff3Directive.values()) { - if(directive.regexPattern.matcher(line).matches()) { + if (directive.regexPattern.matcher(line).matches()) { return directive; } } @@ -583,5 +592,4 @@ protected Object decode(final String line) throws IOException { abstract String encode(final Object object); } - } diff --git a/src/main/java/htsjdk/tribble/gff/Gff3Constants.java b/src/main/java/htsjdk/tribble/gff/Gff3Constants.java index fa49b78be8..7301d8a212 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3Constants.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3Constants.java @@ -1,16 +1,16 @@ package htsjdk.tribble.gff; public class Gff3Constants { - public static final char FIELD_DELIMITER = '\t'; - public static final char ATTRIBUTE_DELIMITER = ';'; - public static final char KEY_VALUE_SEPARATOR = '='; - public static final char VALUE_DELIMITER = ','; - public static final String COMMENT_START = "#"; - public static final String DIRECTIVE_START = "##"; - public static final String UNDEFINED_FIELD_VALUE = "."; - public static final String PARENT_ATTRIBUTE_KEY = "Parent"; - public final static char END_OF_LINE_CHARACTER = '\n'; - public static final String ID_ATTRIBUTE_KEY = "ID"; - public static final String NAME_ATTRIBUTE_KEY = "Name"; - public static final String ALIAS_ATTRIBUTE_KEY = "Alias"; + public static final char FIELD_DELIMITER = '\t'; + public static final char ATTRIBUTE_DELIMITER = ';'; + public static final char KEY_VALUE_SEPARATOR = '='; + public static final char VALUE_DELIMITER = ','; + public static final String COMMENT_START = "#"; + public static final String DIRECTIVE_START = "##"; + public static final String UNDEFINED_FIELD_VALUE = "."; + public static final String PARENT_ATTRIBUTE_KEY = "Parent"; + public static final char END_OF_LINE_CHARACTER = '\n'; + public static final String ID_ATTRIBUTE_KEY = "ID"; + public static final String NAME_ATTRIBUTE_KEY = "Name"; + public static final String ALIAS_ATTRIBUTE_KEY = "Alias"; } diff --git a/src/main/java/htsjdk/tribble/gff/Gff3Feature.java b/src/main/java/htsjdk/tribble/gff/Gff3Feature.java index 37a879a5b9..5ce0888d13 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3Feature.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3Feature.java @@ -2,7 +2,6 @@ import htsjdk.tribble.Feature; import htsjdk.tribble.annotation.Strand; - import java.util.List; import java.util.Map; import java.util.Optional; @@ -22,7 +21,6 @@ public interface Gff3Feature extends Feature { boolean isTopLevelFeature(); - default String getSource() { return getBaseData().getSource(); } @@ -40,26 +38,27 @@ default int getPhase() { return getBaseData().getPhase(); } - default String getType() {return getBaseData().getType();} + default String getType() { + return getBaseData().getType(); + } @Override default String getContig() { return getBaseData().getContig(); } - @Override + @Override default int getStart() { return getBaseData().getStart(); } - default List getAttribute(final String key) { return getBaseData().getAttribute(key); } - + /** * Returns true if this record contains an attribute for the specified key. - * + * * @param key key whose presence in this map is to be tested * @return true if this map contains an attribute for the specified key */ @@ -68,28 +67,38 @@ default boolean hasAttribute(final String key) { } /** - * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... + * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... * This function returns an Optional.empty if the key is not present, * an Optional.of(value) if there is only one value associated to the key, * or it throws an IllegalArgumentException if there is more than one value. - * + * * @param key key whose presence in the attributes is to be tested * @return Optional<String> if this map contains zero or one attribute for the specified key * @throws IllegalArgumentException if there is more than one value. */ default Optional getUniqueAttribute(final String key) { - return getBaseData().getUniqueAttribute(key); + return getBaseData().getUniqueAttribute(key); } - - default Map> getAttributes() { return getBaseData().getAttributes();} - default String getID() { return getBaseData().getId();} + default Map> getAttributes() { + return getBaseData().getAttributes(); + } + + default String getID() { + return getBaseData().getId(); + } - default String getName() { return getBaseData().getName();} + default String getName() { + return getBaseData().getName(); + } - default List getAliases() { return getBaseData().getAliases();} + default List getAliases() { + return getBaseData().getAliases(); + } - default double getScore() { return getBaseData().getScore();} + default double getScore() { + return getBaseData().getScore(); + } /** * Get BaseData object which contains all the basic information of the feature diff --git a/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java b/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java index 9ac33360fa..f05067e90c 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3FeatureImpl.java @@ -3,7 +3,6 @@ import htsjdk.samtools.util.Tuple; import htsjdk.tribble.TribbleException; import htsjdk.tribble.annotation.Strand; - import java.util.*; import java.util.stream.Collectors; @@ -12,7 +11,7 @@ * Discontinuous features which are split between multiple lines in the gff files are implemented as separate features linked as "co-features" */ public class Gff3FeatureImpl implements Gff3Feature { - private final static String DERIVES_FROM_ATTRIBUTE_KEY = "Derives_from"; + private static final String DERIVES_FROM_ATTRIBUTE_KEY = "Derives_from"; /** * basic data about feature, contig, position, strand, etc. @@ -29,11 +28,17 @@ public class Gff3FeatureImpl implements Gff3Feature { */ private final Set topLevelFeatures = new HashSet<>(); - public Gff3FeatureImpl(final String contig, final String source, final String type, - final int start, final int end, final Double score, final Strand strand, final int phase, - final Map> attributes) { + public Gff3FeatureImpl( + final String contig, + final String source, + final String type, + final int start, + final int end, + final Double score, + final Strand strand, + final int phase, + final Map> attributes) { baseData = new Gff3BaseData(contig, source, type, start, end, score, strand, phase, attributes); - } public Gff3FeatureImpl(final Gff3BaseData baseData) { @@ -62,14 +67,18 @@ public boolean isTopLevelFeature() { * @return set of parent features */ @Override - public Set getParents() {return parents;} + public Set getParents() { + return parents; + } /** * Gets set of features for which this feature is a parent * @return set of child features */ @Override - public Set getChildren() {return children;} + public Set getChildren() { + return children; + } @Override public Gff3BaseData getBaseData() { @@ -84,7 +93,11 @@ public Gff3BaseData getBaseData() { public Set getAncestors() { final List ancestors = new ArrayList<>(parents); for (final Gff3FeatureImpl parent : parents) { - ancestors.addAll(getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty()? parent.getAncestors() : parent.getAncestors(new HashSet<>(baseData.getAttributes().get(DERIVES_FROM_ATTRIBUTE_KEY)))); + ancestors.addAll( + getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty() + ? parent.getAncestors() + : parent.getAncestors( + new HashSet<>(baseData.getAttributes().get(DERIVES_FROM_ATTRIBUTE_KEY)))); } return new LinkedHashSet<>(ancestors); } @@ -92,7 +105,8 @@ public Set getAncestors() { private Set getAncestors(final Collection derivingFrom) { final List ancestors = new ArrayList<>(); for (final Gff3FeatureImpl parent : parents) { - if (derivingFrom.contains(parent.getID()) || parent.getAncestors().stream().anyMatch(f -> derivingFrom.contains(f.getID()))) { + if (derivingFrom.contains(parent.getID()) + || parent.getAncestors().stream().anyMatch(f -> derivingFrom.contains(f.getID()))) { ancestors.add(parent); ancestors.addAll(parent.getAncestors()); } @@ -109,20 +123,22 @@ public Set getDescendents() { final List descendants = new ArrayList<>(children); final Set idsInLineage = new HashSet<>(Collections.singleton(baseData.getId())); idsInLineage.addAll(children.stream().map(Gff3Feature::getID).collect(Collectors.toSet())); - for(final Gff3FeatureImpl child : children) { + for (final Gff3FeatureImpl child : children) { descendants.addAll(child.getDescendents(idsInLineage)); } return new LinkedHashSet<>(descendants); } private Set getDescendents(final Set idsInLineage) { - final List childrenToAdd = children.stream().filter(c -> c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty() || - !Collections.disjoint(idsInLineage, c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY))). - collect(Collectors.toList()); + final List childrenToAdd = children.stream() + .filter(c -> c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty() + || !Collections.disjoint(idsInLineage, c.getAttribute(DERIVES_FROM_ATTRIBUTE_KEY))) + .collect(Collectors.toList()); final List descendants = new ArrayList<>(childrenToAdd); final Set updatedIdsInLineage = new HashSet<>(idsInLineage); - updatedIdsInLineage.addAll(childrenToAdd.stream().map(Gff3Feature::getID).collect(Collectors.toSet())); + updatedIdsInLineage.addAll( + childrenToAdd.stream().map(Gff3Feature::getID).collect(Collectors.toSet())); for (final Gff3FeatureImpl child : childrenToAdd) { descendants.addAll(child.getDescendents(updatedIdsInLineage)); } @@ -134,22 +150,33 @@ private Set getDescendents(final Set idsInLineage) { * @return set of co-features */ @Override - public Set getCoFeatures() {return coFeatures;} + public Set getCoFeatures() { + return coFeatures; + } @Override - public boolean hasParents() {return !parents.isEmpty();} + public boolean hasParents() { + return !parents.isEmpty(); + } @Override - public boolean hasChildren() {return !children.isEmpty();} - + public boolean hasChildren() { + return !children.isEmpty(); + } @Override - public boolean hasCoFeatures() {return !coFeatures.isEmpty();} + public boolean hasCoFeatures() { + return !coFeatures.isEmpty(); + } public void addParent(final Gff3FeatureImpl parent) { final Set topLevelFeaturesToAdd = new HashSet<>(parent.getTopLevelFeatures()); if (!getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).isEmpty()) { - topLevelFeaturesToAdd.removeIf(f -> !getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).contains(f.getID()) && f.getDescendents().stream().noneMatch(f2 -> f2.getID()!= null && getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).contains(f2.getID()))); + topLevelFeaturesToAdd.removeIf(f -> !getAttribute(DERIVES_FROM_ATTRIBUTE_KEY) + .contains(f.getID()) + && f.getDescendents().stream() + .noneMatch(f2 -> f2.getID() != null + && getAttribute(DERIVES_FROM_ATTRIBUTE_KEY).contains(f2.getID()))); } parents.add(parent); parent.addChild(this); @@ -164,7 +191,7 @@ private void addChild(final Gff3FeatureImpl child) { private void addTopLevelFeatures(final Collection topLevelFeaturesToAdd) { topLevelFeatures.addAll(topLevelFeaturesToAdd); - //pass topLevelFeature change through to children + // pass topLevelFeature change through to children for (final Gff3FeatureImpl child : children) { child.addTopLevelFeatures(topLevelFeaturesToAdd); child.removeTopLevelFeature(this); @@ -201,7 +228,8 @@ public void addCoFeature(final Gff3FeatureImpl coFeature) { private void addCoFeatureShallow(final Gff3FeatureImpl coFeature) { coFeatures.add(coFeature); if (!coFeature.getID().equals(baseData.getId())) { - throw new TribbleException("Attempting to add co-feature with id " + coFeature.getID() + " to feature with id " + baseData.getId()); + throw new TribbleException("Attempting to add co-feature with id " + coFeature.getID() + + " to feature with id " + baseData.getId()); } } @@ -216,20 +244,16 @@ public boolean equals(Object other) { /* to test for equality, the doubly linked list representation used to represent feature relationships is replaced with a graph representation. equality for between two features is tested by testing equality between their base data fields, and equality between the graphs they are part of. */ - return baseData.equals(((Gff3Feature) other).getBaseData()) && - new Gff3Graph(this).equals(new Gff3Graph((Gff3Feature) other)); + return baseData.equals(((Gff3Feature) other).getBaseData()) + && new Gff3Graph(this).equals(new Gff3Graph((Gff3Feature) other)); } @Override public int hashCode() { - //hash only based on baseData, to keep immutable. + // hash only based on baseData, to keep immutable. return baseData.hashCode(); } - - - - /*** * flatten this feature and all descendents into a set of features * @return set of this feature and all descendents @@ -247,13 +271,15 @@ public Set flatten() { * Used for testing equality between features */ private static class Gff3Graph { - final private Set nodes = new HashSet<>(); - final private Set> parentEdges = new HashSet<>(); - final private Set> childEdges = new HashSet<>(); - final private Set> coFeatureSets = new HashSet<>(); + private final Set nodes = new HashSet<>(); + private final Set> parentEdges = new HashSet<>(); + private final Set> childEdges = new HashSet<>(); + private final Set> coFeatureSets = new HashSet<>(); Gff3Graph(final Gff3Feature feature) { - feature.getTopLevelFeatures().stream().flatMap(f -> f.flatten().stream()).forEach(this::addFeature); + feature.getTopLevelFeatures().stream() + .flatMap(f -> f.flatten().stream()) + .forEach(this::addFeature); } private void addFeature(final Gff3Feature feature) { @@ -268,20 +294,22 @@ private void addNode(final Gff3Feature feature) { } private void addParentEdges(final Gff3Feature feature) { - for(final Gff3Feature parent : feature.getParents()) { + for (final Gff3Feature parent : feature.getParents()) { parentEdges.add(new Tuple<>(feature.getBaseData(), parent.getBaseData())); } } private void addChildEdges(final Gff3Feature feature) { - for(final Gff3Feature child : feature.getChildren()) { + for (final Gff3Feature child : feature.getChildren()) { childEdges.add(new Tuple<>(feature.getBaseData(), child.getBaseData())); } } private void addCoFeatureSet(final Gff3Feature feature) { if (feature.hasCoFeatures()) { - final Set coFeaturesBaseData = feature.getCoFeatures().stream().map(Gff3Feature::getBaseData).collect(Collectors.toSet()); + final Set coFeaturesBaseData = feature.getCoFeatures().stream() + .map(Gff3Feature::getBaseData) + .collect(Collectors.toSet()); coFeaturesBaseData.add(feature.getBaseData()); coFeatureSets.add(coFeaturesBaseData); } @@ -296,10 +324,10 @@ public boolean equals(Object other) { return false; } - return nodes.equals(((Gff3Graph) other).nodes) && - parentEdges.equals(((Gff3Graph) other).parentEdges) && - childEdges.equals(((Gff3Graph) other).childEdges) && - coFeatureSets.equals(((Gff3Graph) other).coFeatureSets); + return nodes.equals(((Gff3Graph) other).nodes) + && parentEdges.equals(((Gff3Graph) other).parentEdges) + && childEdges.equals(((Gff3Graph) other).childEdges) + && coFeatureSets.equals(((Gff3Graph) other).coFeatureSets); } @Override @@ -312,5 +340,4 @@ public int hashCode() { return hash; } } - -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/gff/Gff3Writer.java b/src/main/java/htsjdk/tribble/gff/Gff3Writer.java index 0cee78ce73..7f24d7f0cb 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3Writer.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3Writer.java @@ -4,7 +4,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.TribbleException; - import java.io.BufferedOutputStream; import java.io.Closeable; import java.io.IOException; @@ -19,7 +18,6 @@ import java.util.Map; import java.util.function.Consumer; - /** * A class to write out gff3 files. Features are added using {@link #addFeature(Gff3Feature)}, directives using {@link #addDirective(Gff3Codec.Gff3Directive)}, * and comments using {@link #addComment(String)}. Note that the version 3 directive is automatically added at creation, so should not be added separately. @@ -27,16 +25,18 @@ public class Gff3Writer implements Closeable { private final OutputStream out; - private final static String version = "3.1.25"; + private static final String version = "3.1.25"; public Gff3Writer(final Path path) throws IOException { if (FileExtensions.GFF3.stream().noneMatch(e -> path.toString().endsWith(e))) { throw new TribbleException("File " + path + " does not have extension consistent with gff3"); } - final OutputStream outputStream = IOUtil.hasGzipFileExtension(path)? new BlockCompressedOutputStream(path.toFile()) : Files.newOutputStream(path); + final OutputStream outputStream = IOUtil.hasGzipFileExtension(path) + ? new BlockCompressedOutputStream(path.toFile()) + : Files.newOutputStream(path); out = new BufferedOutputStream(outputStream); - //start with version directive + // start with version directive initialize(); } @@ -67,17 +67,22 @@ private void tryToWrite(final String string) { } private void writeFirstEightFields(final Gff3Feature feature) throws IOException { - writeJoinedByDelimiter(Gff3Constants.FIELD_DELIMITER, this::tryToWrite, Arrays.asList( - escapeString(feature.getContig()), - escapeString(feature.getSource()), - escapeString(feature.getType()), - Integer.toString(feature.getStart()), - Integer.toString(feature.getEnd()), - feature.getScore() < 0 ? Gff3Constants.UNDEFINED_FIELD_VALUE : Double.toString(feature.getScore()), - feature.getStrand().toString(), - feature.getPhase() < 0 ? Gff3Constants.UNDEFINED_FIELD_VALUE : Integer.toString(feature.getPhase()) - ) - ); + writeJoinedByDelimiter( + Gff3Constants.FIELD_DELIMITER, + this::tryToWrite, + Arrays.asList( + escapeString(feature.getContig()), + escapeString(feature.getSource()), + escapeString(feature.getType()), + Integer.toString(feature.getStart()), + Integer.toString(feature.getEnd()), + feature.getScore() < 0 + ? Gff3Constants.UNDEFINED_FIELD_VALUE + : Double.toString(feature.getScore()), + feature.getStrand().toString(), + feature.getPhase() < 0 + ? Gff3Constants.UNDEFINED_FIELD_VALUE + : Integer.toString(feature.getPhase()))); } void writeAttributes(final Map> attributes) throws IOException { @@ -85,7 +90,10 @@ void writeAttributes(final Map> attributes) throws IOExcept out.write(Gff3Constants.UNDEFINED_FIELD_VALUE.getBytes()); } - writeJoinedByDelimiter(Gff3Constants.ATTRIBUTE_DELIMITER, e -> writeKeyValuePair(e.getKey(), e.getValue()), attributes.entrySet()); + writeJoinedByDelimiter( + Gff3Constants.ATTRIBUTE_DELIMITER, + e -> writeKeyValuePair(e.getKey(), e.getValue()), + attributes.entrySet()); } void writeKeyValuePair(final String key, final List values) { @@ -98,7 +106,8 @@ void writeKeyValuePair(final String key, final List values) { } } - private void writeJoinedByDelimiter(final char delimiter, final Consumer consumer, final Collection fields) throws IOException { + private void writeJoinedByDelimiter( + final char delimiter, final Consumer consumer, final Collection fields) throws IOException { boolean isNotFirstField = false; for (final T field : fields) { if (isNotFirstField) { @@ -135,8 +144,9 @@ protected String escapeString(final String s) { static String encodeString(final String s) { try { - //URLEncoder.encode is hardcoded to change all spaces to +, but we want spaces left unchanged so have to do this - //+ is escaped to %2B, so no loss of information + // URLEncoder.encode is hardcoded to change all spaces to +, but we want spaces left unchanged so have to do + // this + // + is escaped to %2B, so no loss of information return URLEncoder.encode(s, "UTF-8").replace("+", " "); } catch (final UnsupportedEncodingException ex) { throw new TribbleException("Encoding failure", ex); @@ -182,4 +192,4 @@ public void addComment(final String comment) throws IOException { public void close() throws IOException { out.close(); } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/gff/SequenceRegion.java b/src/main/java/htsjdk/tribble/gff/SequenceRegion.java index 13dc4f924e..5fa2ab5770 100644 --- a/src/main/java/htsjdk/tribble/gff/SequenceRegion.java +++ b/src/main/java/htsjdk/tribble/gff/SequenceRegion.java @@ -34,15 +34,23 @@ void setCircular() { } @Override - public String getContig(){return contig;} + public String getContig() { + return contig; + } @Override - public int getStart(){return start;} + public int getStart() { + return start; + } @Override - public int getEnd(){return end;} + public int getEnd() { + return end; + } - public boolean isCircular(){return isCircular;} + public boolean isCircular() { + return isCircular; + } @Override public boolean equals(Object other) { @@ -55,7 +63,10 @@ public boolean equals(Object other) { } final SequenceRegion otherSequenceRegion = (SequenceRegion) other; - return otherSequenceRegion.start == start && otherSequenceRegion.end==end && otherSequenceRegion.contig.equals(contig) && otherSequenceRegion.isCircular == isCircular; + return otherSequenceRegion.start == start + && otherSequenceRegion.end == end + && otherSequenceRegion.contig.equals(contig) + && otherSequenceRegion.isCircular == isCircular; } private int computeHashCode() { @@ -67,5 +78,7 @@ private int computeHashCode() { } @Override - public int hashCode() { return hashCode;} + public int hashCode() { + return hashCode; + } } diff --git a/src/main/java/htsjdk/tribble/index/AbstractIndex.java b/src/main/java/htsjdk/tribble/index/AbstractIndex.java index 256e6716a6..8a10fed37f 100644 --- a/src/main/java/htsjdk/tribble/index/AbstractIndex.java +++ b/src/main/java/htsjdk/tribble/index/AbstractIndex.java @@ -25,7 +25,6 @@ import htsjdk.tribble.TribbleException; import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.BufferedOutputStream; import java.io.File; import java.io.IOException; @@ -63,18 +62,17 @@ public enum IndexType { // the current version of the index public static final int VERSION = 3; - public static final int MAGIC_NUMBER = 1480870228; // byte[]{'T', 'I', 'D', 'X'}; - + public static final int MAGIC_NUMBER = 1480870228; // byte[]{'T', 'I', 'D', 'X'}; - private final static String NO_MD5 = ""; - private final static long NO_FILE_SIZE = -1L; - private final static long NO_TS = -1L; + private static final String NO_MD5 = ""; + private static final long NO_FILE_SIZE = -1L; + private static final long NO_TS = -1L; - protected int version; // Our version value - protected Path indexedPath = null; // The file we've created this index for + protected int version; // Our version value + protected Path indexedPath = null; // The file we've created this index for protected long indexedFileSize = NO_FILE_SIZE; // The size of the indexed file - protected long indexedFileTS = NO_TS; // The timestamp - protected String indexedFileMD5 = NO_MD5; // The MD5 value, generally not filled in (expensive to calc) + protected long indexedFileTS = NO_TS; // The timestamp + protected String indexedFileMD5 = NO_MD5; // The MD5 value, generally not filled in (expensive to calc) protected int flags; protected final Log logger = Log.getInstance(this.getClass()); @@ -201,7 +199,6 @@ protected void validateIndexHeader(final int indexType, final LittleEndianInputS if (type != indexType) { throw new TribbleException(String.format("Unexpected index type %d", type)); } - } /** @@ -369,7 +366,7 @@ private final ChrIndex getChrIndex(final String chr) { public void write(final LittleEndianOutputStream stream) throws IOException { writeHeader(stream); - //# of chromosomes + // # of chromosomes stream.writeInt(chrIndices.size()); for (final ChrIndex chrIdx : chrIndices.values()) { chrIdx.write(stream); @@ -378,7 +375,8 @@ public void write(final LittleEndianOutputStream stream) throws IOException { @Override public void write(final Path idxPath) throws IOException { - try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(Files.newOutputStream(idxPath)))) { + try (final LittleEndianOutputStream idxStream = + new LittleEndianOutputStream(new BufferedOutputStream(Files.newOutputStream(idxPath)))) { write(idxStream); } } @@ -391,7 +389,6 @@ public void writeBasedOnFeaturePath(final Path featurePath) throws IOException { write(Tribble.indexPath(featurePath)); } - public void read(final LittleEndianInputStream dis) throws IOException { try { readHeader(dis); @@ -406,14 +403,16 @@ public void read(final LittleEndianInputStream dis) throws IOException { } } catch (final InstantiationException e) { - throw new TribbleException.UnableToCreateCorrectIndexType("Unable to create class " + getChrIndexClass(), e); + throw new TribbleException.UnableToCreateCorrectIndexType( + "Unable to create class " + getChrIndexClass(), e); } catch (final IllegalAccessException e) { - throw new TribbleException.UnableToCreateCorrectIndexType("Unable to create class " + getChrIndexClass(), e); + throw new TribbleException.UnableToCreateCorrectIndexType( + "Unable to create class " + getChrIndexClass(), e); } finally { dis.close(); } - //printIndexInfo(); + // printIndexInfo(); } protected void printIndexInfo() { @@ -443,7 +442,9 @@ protected BlockStats getBlockStats(final boolean logDetails) { stats.total += nBlocks; if (logDetails) - System.out.println(String.format(" %s => %d blocks, %d empty, %.2f", elt.getKey(), nBlocks, nEmptyBlocks, (100.0 * nEmptyBlocks) / nBlocks)); + System.out.println(String.format( + " %s => %d blocks, %d empty, %.2f", + elt.getKey(), nBlocks, nEmptyBlocks, (100.0 * nEmptyBlocks) / nBlocks)); } } @@ -452,7 +453,8 @@ protected BlockStats getBlockStats(final boolean logDetails) { protected String statsSummary() { final BlockStats stats = getBlockStats(false); - return String.format("%12d blocks (%12d empty (%.2f%%))", stats.total, stats.empty, (100.0 * stats.empty) / stats.total); + return String.format( + "%12d blocks (%12d empty (%.2f%%))", stats.total, stats.empty, (100.0 * stats.empty) / stats.total); } @Override diff --git a/src/main/java/htsjdk/tribble/index/Block.java b/src/main/java/htsjdk/tribble/index/Block.java index 507ca9405d..b7f3b7659e 100644 --- a/src/main/java/htsjdk/tribble/index/Block.java +++ b/src/main/java/htsjdk/tribble/index/Block.java @@ -27,7 +27,7 @@ /** * Represents a contiguous block of bytes in a file, defined by a start position and size (in bytes) -*/ + */ public class Block { private final long startPosition; @@ -59,11 +59,10 @@ public long getEndPosition() { * @param endPosition Where the block ends, in bytes */ public void setEndPosition(final long endPosition) { - if(endPosition < startPosition) - throw new IllegalArgumentException("Attempting to set block end position to " + - endPosition + " which is before the start of " + startPosition); + if (endPosition < startPosition) + throw new IllegalArgumentException("Attempting to set block end position to " + endPosition + + " which is before the start of " + startPosition); size = endPosition - startPosition; - } /** @@ -80,8 +79,7 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Block block = (Block) o; - return startPosition == block.startPosition && - size == block.size; + return startPosition == block.startPosition && size == block.size; } @Override diff --git a/src/main/java/htsjdk/tribble/index/ChrIndex.java b/src/main/java/htsjdk/tribble/index/ChrIndex.java index b1582ea685..e57b06d5c0 100644 --- a/src/main/java/htsjdk/tribble/index/ChrIndex.java +++ b/src/main/java/htsjdk/tribble/index/ChrIndex.java @@ -25,14 +25,12 @@ import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.IOException; import java.util.List; /** * Represents an index on a specific chromosome */ - public interface ChrIndex { public String getName(); diff --git a/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java b/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java index 0d3fb921cb..c8af3f1977 100644 --- a/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java @@ -30,7 +30,6 @@ import htsjdk.tribble.index.interval.IntervalIndexCreator; import htsjdk.tribble.index.linear.LinearIndexCreator; import htsjdk.tribble.util.MathUtils; - import java.io.File; import java.nio.file.Path; import java.util.HashMap; @@ -38,7 +37,6 @@ import java.util.Map; import java.util.TreeMap; - /** * A DynamicIndexCreator creates the proper index based on an {@link IndexFactory.IndexBalanceApproach} and * the characteristics of the file. Ultimately this is either a LinearIndex or an IntervalTreeIndex, with index @@ -46,13 +44,14 @@ */ public class DynamicIndexCreator extends TribbleIndexCreator { IndexFactory.IndexBalanceApproach iba; - Map creators; + Map creators; /** * we're interested in two stats: * the longest feature and the density of features - */ + */ int longestFeatureLength = 0; + long featureCount = 0; MathUtils.RunningStat stats = new MathUtils.RunningStat(); @@ -73,7 +72,8 @@ public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalance public Index finalizeIndex(final long finalFilePosition) { // finalize all of the indexes // return the score of the indexes we've generated - final Map mapping = scoreIndexes((double)featureCount/(double)basesSeen, creators, longestFeatureLength, iba); + final Map mapping = + scoreIndexes((double) featureCount / (double) basesSeen, creators, longestFeatureLength, iba); final TribbleIndexCreator creator = getMinIndex(mapping, this.iba); for (final Map.Entry entry : properties.entrySet()) { @@ -81,12 +81,12 @@ public Index finalizeIndex(final long finalFilePosition) { } // add our statistics to the file - creator.addProperty("FEATURE_LENGTH_MEAN",String.valueOf(stats.mean())); - creator.addProperty("FEATURE_LENGTH_STD_DEV",String.valueOf(stats.standardDeviation())); - creator.addProperty("MEAN_FEATURE_VARIANCE",String.valueOf(stats.variance())); + creator.addProperty("FEATURE_LENGTH_MEAN", String.valueOf(stats.mean())); + creator.addProperty("FEATURE_LENGTH_STD_DEV", String.valueOf(stats.standardDeviation())); + creator.addProperty("MEAN_FEATURE_VARIANCE", String.valueOf(stats.variance())); // add the feature count - creator.addProperty("FEATURE_COUNT",String.valueOf(featureCount)); + creator.addProperty("FEATURE_COUNT", String.valueOf(featureCount)); // Now let's finalize and create the index itself return creator.finalizeIndex(finalFilePosition); @@ -97,63 +97,75 @@ public Index finalizeIndex(final long finalFilePosition) { * @param inputPath the input path to use to create the indexes * @return a map of index type to the best index for that balancing approach */ - private Map getIndexCreators(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) { - final Map creators = new HashMap(); + private Map getIndexCreators( + final Path inputPath, final IndexFactory.IndexBalanceApproach iba) { + final Map creators = + new HashMap(); if (iba == IndexFactory.IndexBalanceApproach.FOR_SIZE) { // add a linear index with the default bin size - final LinearIndexCreator linearNormal = new LinearIndexCreator(inputPath, LinearIndexCreator.DEFAULT_BIN_WIDTH); - creators.put(IndexFactory.IndexType.LINEAR,linearNormal); + final LinearIndexCreator linearNormal = + new LinearIndexCreator(inputPath, LinearIndexCreator.DEFAULT_BIN_WIDTH); + creators.put(IndexFactory.IndexType.LINEAR, linearNormal); // create a tree index with the default size - final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputPath, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); - creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeNormal); + final IntervalIndexCreator treeNormal = + new IntervalIndexCreator(inputPath, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); + creators.put(IndexFactory.IndexType.INTERVAL_TREE, treeNormal); } - // this section is a little more arbitrary; we're creating indexes with a bin size that's a portion of the default; these + // this section is a little more arbitrary; we're creating indexes with a bin size that's a portion of the + // default; these // values were determined experimentally if (iba == IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) { // create a linear index with a small bin size final LinearIndexCreator linearSmallBin = new LinearIndexCreator(inputPath, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4)); - creators.put(IndexFactory.IndexType.LINEAR,linearSmallBin); + creators.put(IndexFactory.IndexType.LINEAR, linearSmallBin); // create a tree index with a small index size final IntervalIndexCreator treeSmallBin = new IntervalIndexCreator(inputPath, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8)); - creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeSmallBin); + creators.put(IndexFactory.IndexType.INTERVAL_TREE, treeSmallBin); } return creators; } - @Override public void addFeature(final Feature f, final long filePosition) { - // protected static Map createIndex(FileBasedFeatureIterator iterator, Map creators, IndexBalanceApproach iba) { + // protected static Map createIndex(FileBasedFeatureIterator iterator, + // Map creators, IndexBalanceApproach iba) { // feed each feature to the indexes we've created // first take care of the stats featureCount++; - // calculate the number of bases seen - we have to watch out for the situation where the last record was on the previous chromosome - basesSeen = (lastFeature == null) ? basesSeen + f.getStart() : - ((f.getStart() - lastFeature.getStart() >= 0) ? basesSeen + (f.getStart() - lastFeature.getStart()) : basesSeen + f.getStart()); + // calculate the number of bases seen - we have to watch out for the situation where the last record was on the + // previous chromosome + basesSeen = (lastFeature == null) + ? basesSeen + f.getStart() + : ((f.getStart() - lastFeature.getStart() >= 0) + ? basesSeen + (f.getStart() - lastFeature.getStart()) + : basesSeen + f.getStart()); - longestFeatureLength = Math.max(longestFeatureLength,(f.getEnd()-f.getStart()) + 1); + longestFeatureLength = Math.max(longestFeatureLength, (f.getEnd() - f.getStart()) + 1); // push the longest feature to the running stats stats.push(longestFeatureLength); // now feed the feature to each of our creators for (final IndexCreator creator : creators.values()) { - creator.addFeature(f,filePosition); + creator.addFeature(f, filePosition); } - //Redundant check, done in IndexFactory + // Redundant check, done in IndexFactory // if the last feature is after the current feature, exception out -// if (lastFeature != null && f.getStart() < lastFeature.getStart() && lastFeature.getChr().equals(f.getChr())) -// throw new TribbleException.MalformedFeatureFile("We saw a record with a start of " + f.getChr() + ":" + f.getStart() + -// " after a record with a start of " + lastFeature.getChr() + ":" + lastFeature.getStart(), inputFile.getAbsolutePath()); + // if (lastFeature != null && f.getStart() < lastFeature.getStart() && + // lastFeature.getChr().equals(f.getChr())) + // throw new TribbleException.MalformedFeatureFile("We saw a record with a start of " + f.getChr() + + // ":" + f.getStart() + + // " after a record with a start of " + lastFeature.getChr() + ":" + lastFeature.getStart(), + // inputFile.getAbsolutePath()); // save the last feature lastFeature = f; @@ -173,20 +185,28 @@ public void addFeature(final Feature f, final long filePosition) { * @param iba the index balancing approach * @return the best index available for the target indexes */ - protected static LinkedHashMap scoreIndexes(final double densityOfFeatures, final Map indexes, final int longestFeature, final IndexFactory.IndexBalanceApproach iba) { + protected static LinkedHashMap scoreIndexes( + final double densityOfFeatures, + final Map indexes, + final int longestFeature, + final IndexFactory.IndexBalanceApproach iba) { if (indexes.size() < 1) throw new IllegalArgumentException("Please specify at least one index to evaluate"); - final LinkedHashMap scores = new LinkedHashMap(); + final LinkedHashMap scores = new LinkedHashMap(); - for (final Map.Entry entry : indexes.entrySet()) { + for (final Map.Entry entry : indexes.entrySet()) { // we have different scoring if (entry.getValue() instanceof LinearIndexCreator) { - final double binSize = ((LinearIndexCreator)(entry.getValue())).getBinSize(); - scores.put(binSize * densityOfFeatures * Math.ceil((double) longestFeature / binSize), entry.getValue()); + final double binSize = ((LinearIndexCreator) (entry.getValue())).getBinSize(); + scores.put( + binSize * densityOfFeatures * Math.ceil((double) longestFeature / binSize), entry.getValue()); } else if (entry.getValue() instanceof IntervalIndexCreator) { - scores.put((double) ((IntervalIndexCreator)entry.getValue()).getFeaturesPerInterval(), entry.getValue()); + scores.put( + (double) ((IntervalIndexCreator) entry.getValue()).getFeaturesPerInterval(), entry.getValue()); } else { - throw new TribbleException.UnableToCreateCorrectIndexType("Unknown index type, we don't have a scoring method for " + entry.getValue().getClass()); + throw new TribbleException.UnableToCreateCorrectIndexType( + "Unknown index type, we don't have a scoring method for " + + entry.getValue().getClass()); } } return scores; @@ -197,12 +217,16 @@ protected static LinkedHashMap scoreIndexes(final do * @param scores the list of scaled features/bin scores for each index type * @return the best score index value */ - private TribbleIndexCreator getMinIndex(final Map scores, final IndexFactory.IndexBalanceApproach iba) { - final TreeMap map = new TreeMap(); + private TribbleIndexCreator getMinIndex( + final Map scores, final IndexFactory.IndexBalanceApproach iba) { + final TreeMap map = new TreeMap(); map.putAll(scores); - - // if we are optimizing for seek time, choose the lowest score (adjusted features/bin value), if for storage size, choose the opposite - final TribbleIndexCreator idx = (iba != IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) ? map.get(map.lastKey()) : map.get(map.firstKey()); + + // if we are optimizing for seek time, choose the lowest score (adjusted features/bin value), if for storage + // size, choose the opposite + final TribbleIndexCreator idx = (iba != IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) + ? map.get(map.lastKey()) + : map.get(map.firstKey()); return idx; } diff --git a/src/main/java/htsjdk/tribble/index/Index.java b/src/main/java/htsjdk/tribble/index/Index.java index be1b697e85..3838daf9ca 100644 --- a/src/main/java/htsjdk/tribble/index/Index.java +++ b/src/main/java/htsjdk/tribble/index/Index.java @@ -25,7 +25,6 @@ import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.File; import java.io.IOException; import java.nio.file.Path; @@ -115,7 +114,7 @@ public default void writeBasedOnFeatureFile(File featureFile) throws IOException /** * @return get the list of properties for this index. Returns null if no properties. */ - public Map getProperties(); + public Map getProperties(); /** * Returns true if this and obj are 'effectively' equivalent indices. Ignores the diff --git a/src/main/java/htsjdk/tribble/index/IndexCreator.java b/src/main/java/htsjdk/tribble/index/IndexCreator.java index c90ec9f34b..b3c1416fe3 100644 --- a/src/main/java/htsjdk/tribble/index/IndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/IndexCreator.java @@ -31,7 +31,7 @@ * An interface for creating indexes * * @author jrobinso - */ + */ public interface IndexCreator { /** * Add a feature to the index @@ -51,7 +51,5 @@ public interface IndexCreator { * Set the sequence dictionary for the index. Default implementation does nothing. * @param dict the dictionary to add to the index. */ - public default void setIndexSequenceDictionary(final SAMSequenceDictionary dict) { } + public default void setIndexSequenceDictionary(final SAMSequenceDictionary dict) {} } - - diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java index 1e26c33300..4146c631fa 100644 --- a/src/main/java/htsjdk/tribble/index/IndexFactory.java +++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java @@ -47,7 +47,6 @@ import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.ParsingUtils; import htsjdk.utils.ValidationUtils; - import java.io.BufferedInputStream; import java.io.EOFException; import java.io.File; @@ -79,10 +78,21 @@ public enum IndexBalanceApproach { * an enum that contains all of the information about the index types, and how to create them */ public enum IndexType { - LINEAR(AbstractIndex.MAGIC_NUMBER, LinearIndex.INDEX_TYPE, true, LinearIndex::new, LinearIndexCreator.DEFAULT_BIN_WIDTH), - INTERVAL_TREE(AbstractIndex.MAGIC_NUMBER, IntervalTreeIndex.INDEX_TYPE, true, IntervalTreeIndex::new, IntervalIndexCreator.DEFAULT_FEATURE_COUNT), - // Tabix index initialization requires additional information, so generic construction won't work, thus indexCreatorClass is null. - TABIX(TabixIndex.MAGIC_NUMBER, null, false, TabixIndex::new, -1) ; + LINEAR( + AbstractIndex.MAGIC_NUMBER, + LinearIndex.INDEX_TYPE, + true, + LinearIndex::new, + LinearIndexCreator.DEFAULT_BIN_WIDTH), + INTERVAL_TREE( + AbstractIndex.MAGIC_NUMBER, + IntervalTreeIndex.INDEX_TYPE, + true, + IntervalTreeIndex::new, + IntervalIndexCreator.DEFAULT_FEATURE_COUNT), + // Tabix index initialization requires additional information, so generic construction won't work, thus + // indexCreatorClass is null. + TABIX(TabixIndex.MAGIC_NUMBER, null, false, TabixIndex::new, -1); private final int magicNumber; private final Integer tribbleIndexType; @@ -102,7 +112,12 @@ private interface IndexFromStreamFunction { Index apply(InputStream t) throws IOException; } - IndexType(final int magicNumber, final Integer tribbleIndexType, final boolean canCreate, final IndexFromStreamFunction createFromInputStream, final int defaultBinSize) { + IndexType( + final int magicNumber, + final Integer tribbleIndexType, + final boolean canCreate, + final IndexFromStreamFunction createFromInputStream, + final int defaultBinSize) { this.magicNumber = magicNumber; this.tribbleIndexType = tribbleIndexType; this.canCreate = canCreate; @@ -122,7 +137,9 @@ public Index createIndex(final InputStream in) { } } - public int getMagicNumber() { return magicNumber; } + public int getMagicNumber() { + return magicNumber; + } /** * @@ -145,8 +162,8 @@ public static IndexType getIndexType(final BufferedInputStream is) { is.reset(); for (final IndexType indexType : IndexType.values()) { - if (indexType.magicNumber == magicNumber && - (indexType.tribbleIndexType == null || indexType.tribbleIndexType == type)) { + if (indexType.magicNumber == magicNumber + && (indexType.tribbleIndexType == null || indexType.tribbleIndexType == type)) { return indexType; } } @@ -159,7 +176,6 @@ public static IndexType getIndexType(final BufferedInputStream is) { } } - /** * Load in index from the specified file. The type of index (LinearIndex or IntervalTreeIndex) is determined * at run time by reading the type flag in the file. @@ -178,7 +194,8 @@ public static Index loadIndex(final String indexFile) { * @param indexWrapper a wrapper to apply to the raw byte stream of the index file, only applied to uri's loaded as * {@link java.nio.file.Path} */ - public static Index loadIndex(final String indexFile, Function indexWrapper) { + public static Index loadIndex( + final String indexFile, Function indexWrapper) { try { return loadIndex(indexFile, indexFileInputStream(indexFile, indexWrapper)); } catch (final IOException ex) { @@ -194,10 +211,11 @@ public static Index loadIndex(final String indexFile, Function indexWrapper) throws IOException { + private static InputStream indexFileInputStream( + final String indexFile, Function indexWrapper) + throws IOException { final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile, indexWrapper); if (indexFile.endsWith(".gz")) { return new GZIPInputStream(inputStreamInitial); - } - else if (indexFile.endsWith(FileExtensions.TABIX_INDEX)) { + } else if (indexFile.endsWith(FileExtensions.TABIX_INDEX)) { return new BlockCompressedInputStream(inputStreamInitial); - } - else { + } else { return inputStreamInitial; } } @@ -226,11 +244,9 @@ else if (indexFile.endsWith(FileExtensions.TABIX_INDEX)) { * @param inputFile the input file to load features from * @param codec the codec to use for decoding records */ - public static LinearIndex createLinearIndex(final File inputFile, final FeatureCodec codec) { - return createLinearIndex( - IOUtil.toPath(inputFile), - codec, - LinearIndexCreator.DEFAULT_BIN_WIDTH); + public static LinearIndex createLinearIndex( + final File inputFile, final FeatureCodec codec) { + return createLinearIndex(IOUtil.toPath(inputFile), codec, LinearIndexCreator.DEFAULT_BIN_WIDTH); } /** @@ -239,8 +255,8 @@ public static LinearIndex createLin * @param inputPath the input file to load features from * @param codec the codec to use for decoding records */ - public static LinearIndex createLinearIndex(final Path inputPath, - final FeatureCodec codec) { + public static LinearIndex createLinearIndex( + final Path inputPath, final FeatureCodec codec) { return createLinearIndex(inputPath, codec, LinearIndexCreator.DEFAULT_BIN_WIDTH); } @@ -251,9 +267,8 @@ public static LinearIndex createLine * @param codec the codec to use for decoding records * @param binSize the bin size */ - public static LinearIndex createLinearIndex(final File inputFile, - final FeatureCodec codec, - final int binSize) { + public static LinearIndex createLinearIndex( + final File inputFile, final FeatureCodec codec, final int binSize) { return createLinearIndex(IOUtil.toPath(inputFile), codec, binSize); } @@ -264,12 +279,11 @@ public static LinearIndex createLine * @param codec the codec to use for decoding records * @param binSize the bin size */ - public static LinearIndex createLinearIndex(final Path inputPath, - final FeatureCodec codec, - final int binSize) { + public static LinearIndex createLinearIndex( + final Path inputPath, final FeatureCodec codec, final int binSize) { ValidationUtils.nonNull(inputPath, "input path must be non-null"); final LinearIndexCreator indexCreator = new LinearIndexCreator(inputPath, binSize); - return (LinearIndex)createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); + return (LinearIndex) createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); } /** @@ -278,8 +292,8 @@ public static LinearIndex createLine * @param inputFile the file containing the features * @param codec to decode the features */ - public static IntervalTreeIndex createIntervalIndex(final File inputFile, - final FeatureCodec codec) { + public static IntervalTreeIndex createIntervalIndex( + final File inputFile, final FeatureCodec codec) { return createIntervalIndex(IOUtil.toPath(inputFile), codec, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); } @@ -289,12 +303,11 @@ public static IntervalTreeIndex crea * @param inputPath the file containing the features * @param codec to decode the features */ - public static IntervalTreeIndex createIntervalIndex(final Path inputPath, - final FeatureCodec codec) { + public static IntervalTreeIndex createIntervalIndex( + final Path inputPath, final FeatureCodec codec) { return createIntervalIndex(inputPath, codec, IntervalIndexCreator.DEFAULT_FEATURE_COUNT); } - /** * a helper method for creating an interval-tree index * @@ -302,9 +315,8 @@ public static IntervalTreeIndex crea * @param codec the codec to use for decoding records * @param featuresPerInterval */ - public static IntervalTreeIndex createIntervalIndex(final File inputFile, - final FeatureCodec codec, - final int featuresPerInterval) { + public static IntervalTreeIndex createIntervalIndex( + final File inputFile, final FeatureCodec codec, final int featuresPerInterval) { return createIntervalIndex(IOUtil.toPath(inputFile), codec, featuresPerInterval); } @@ -315,12 +327,11 @@ public static IntervalTreeIndex crea * @param codec the codec to use for decoding records * @param featuresPerInterval */ - public static IntervalTreeIndex createIntervalIndex(final Path inputPath, - final FeatureCodec codec, - final int featuresPerInterval) { + public static IntervalTreeIndex createIntervalIndex( + final Path inputPath, final FeatureCodec codec, final int featuresPerInterval) { ValidationUtils.nonNull(inputPath, "input path must be non-null"); final IntervalIndexCreator indexCreator = new IntervalIndexCreator(inputPath, featuresPerInterval); - return (IntervalTreeIndex)createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); + return (IntervalTreeIndex) createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); } /** @@ -329,7 +340,8 @@ public static IntervalTreeIndex crea * @param inputFile the input file to load features from * @param codec the codec to use for decoding records */ - public static Index createDynamicIndex(final File inputFile, final FeatureCodec codec) { + public static Index createDynamicIndex( + final File inputFile, final FeatureCodec codec) { return createDynamicIndex(IOUtil.toPath(inputFile), codec, IndexBalanceApproach.FOR_SEEK_TIME); } @@ -339,11 +351,11 @@ public static Index createDynamicInd * @param inputPath the input path to load features from * @param codec the codec to use for decoding records */ - public static Index createDynamicIndex(final Path inputPath, final FeatureCodec codec) { + public static Index createDynamicIndex( + final Path inputPath, final FeatureCodec codec) { return createDynamicIndex(inputPath, codec, IndexBalanceApproach.FOR_SEEK_TIME); } - /** * Create a index of the specified type with default binning parameters * @@ -351,9 +363,8 @@ public static Index createDynamicInd * @param codec the codec to use for decoding records * @param type the type of index to create */ - public static Index createIndex(final File inputFile, - final FeatureCodec codec, - final IndexType type) { + public static Index createIndex( + final File inputFile, final FeatureCodec codec, final IndexType type) { return createIndex(IOUtil.toPath(inputFile), codec, type, null); } @@ -364,9 +375,8 @@ public static Index createIndex(fina * @param codec the codec to use for decoding records * @param type the type of index to create */ - public static Index createIndex(final Path inputhPath, - final FeatureCodec codec, - final IndexType type) { + public static Index createIndex( + final Path inputhPath, final FeatureCodec codec, final IndexType type) { return createIndex(inputhPath, codec, type, null); } @@ -378,10 +388,11 @@ public static Index createIndex(fina * @param type the type of index to create * @param sequenceDictionary May be null, but if present may reduce memory footprint for tabix index creation */ - public static Index createIndex(final File inputFile, - final FeatureCodec codec, - final IndexType type, - final SAMSequenceDictionary sequenceDictionary) { + public static Index createIndex( + final File inputFile, + final FeatureCodec codec, + final IndexType type, + final SAMSequenceDictionary sequenceDictionary) { return createIndex(IOUtil.toPath(inputFile), codec, type, sequenceDictionary); } @@ -393,15 +404,20 @@ public static Index createIndex(fina * @param type the type of index to create * @param sequenceDictionary May be null, but if present may reduce memory footprint for tabix index creation */ - public static Index createIndex(final Path inputPath, - final FeatureCodec codec, - final IndexType type, - final SAMSequenceDictionary sequenceDictionary) { + public static Index createIndex( + final Path inputPath, + final FeatureCodec codec, + final IndexType type, + final SAMSequenceDictionary sequenceDictionary) { switch (type) { - case INTERVAL_TREE: return createIntervalIndex(inputPath, codec); - case LINEAR: return createLinearIndex(inputPath, codec); - case TABIX: return createTabixIndex(inputPath, codec, sequenceDictionary); - default: throw new IllegalArgumentException("Unrecognized IndexType " + type); + case INTERVAL_TREE: + return createIntervalIndex(inputPath, codec); + case LINEAR: + return createLinearIndex(inputPath, codec); + case TABIX: + return createTabixIndex(inputPath, codec, sequenceDictionary); + default: + throw new IllegalArgumentException("Unrecognized IndexType " + type); } } @@ -424,9 +440,8 @@ public static void writeIndex(final Index idx, final File idxFile) throws IOExce * @param codec the codec to use for decoding records * @param iba the index balancing approach */ - public static Index createDynamicIndex(final File inputFile, - final FeatureCodec codec, - final IndexBalanceApproach iba) { + public static Index createDynamicIndex( + final File inputFile, final FeatureCodec codec, final IndexBalanceApproach iba) { return createDynamicIndex(IOUtil.toPath(inputFile), codec, iba); } @@ -437,9 +452,8 @@ public static Index createDynamicInd * @param codec the codec to use for decoding records * @param iba the index balancing approach */ - public static Index createDynamicIndex(final Path inputPath, - final FeatureCodec codec, - final IndexBalanceApproach iba) { + public static Index createDynamicIndex( + final Path inputPath, final FeatureCodec codec, final IndexBalanceApproach iba) { ValidationUtils.nonNull(inputPath, "input path must be non-null"); // get a list of index creators final DynamicIndexCreator indexCreator = new DynamicIndexCreator(inputPath, iba); @@ -453,10 +467,11 @@ public static Index createDynamicInd * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features * in inputFile must be in the order defined by sequenceDictionary, if it is present. */ - public static TabixIndex createTabixIndex(final File inputFile, - final FeatureCodec codec, - final TabixFormat tabixFormat, - final SAMSequenceDictionary sequenceDictionary) { + public static TabixIndex createTabixIndex( + final File inputFile, + final FeatureCodec codec, + final TabixFormat tabixFormat, + final SAMSequenceDictionary sequenceDictionary) { return createTabixIndex(IOUtil.toPath(inputFile), codec, tabixFormat, sequenceDictionary); } @@ -467,13 +482,14 @@ public static TabixIndex createTabix * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features * in inputFile must be in the order defined by sequenceDictionary, if it is present. */ - public static TabixIndex createTabixIndex(final Path inputPath, - final FeatureCodec codec, - final TabixFormat tabixFormat, - final SAMSequenceDictionary sequenceDictionary) { + public static TabixIndex createTabixIndex( + final Path inputPath, + final FeatureCodec codec, + final TabixFormat tabixFormat, + final SAMSequenceDictionary sequenceDictionary) { ValidationUtils.nonNull(inputPath, "input path must be non-null"); final TabixIndexCreator indexCreator = new TabixIndexCreator(sequenceDictionary, tabixFormat); - return (TabixIndex)createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); + return (TabixIndex) createIndex(inputPath, new FeatureIterator<>(inputPath, codec), indexCreator); } /** @@ -483,9 +499,10 @@ public static TabixIndex createTabix * in inputFile must be in the order defined by sequenceDictionary, if it is present. * */ - public static TabixIndex createTabixIndex(final File inputFile, - final FeatureCodec codec, - final SAMSequenceDictionary sequenceDictionary) { + public static TabixIndex createTabixIndex( + final File inputFile, + final FeatureCodec codec, + final SAMSequenceDictionary sequenceDictionary) { return createTabixIndex(IOUtil.toPath(inputFile), codec, codec.getTabixFormat(), sequenceDictionary); } @@ -496,9 +513,10 @@ public static TabixIndex createTabix * in inputFile must be in the order defined by sequenceDictionary, if it is present. * */ - public static TabixIndex createTabixIndex(final Path inputPath, - final FeatureCodec codec, - final SAMSequenceDictionary sequenceDictionary) { + public static TabixIndex createTabixIndex( + final Path inputPath, + final FeatureCodec codec, + final SAMSequenceDictionary sequenceDictionary) { return createTabixIndex(inputPath, codec, codec.getTabixFormat(), sequenceDictionary); } @@ -511,17 +529,17 @@ private static Index createIndex(final Path inputPath, final FeatureIterator ite currentFeature = iterator.next(); checkSorted(inputPath, lastFeature, currentFeature); - //should only visit chromosomes once + // should only visit chromosomes once final String curChr = currentFeature.getContig(); final String lastChr = lastFeature != null ? lastFeature.getContig() : null; - if(!curChr.equals(lastChr)){ - if(visitedChromos.containsKey(curChr)){ + if (!curChr.equals(lastChr)) { + if (visitedChromos.containsKey(curChr)) { String msg = "Input file must have contiguous chromosomes."; msg += " Saw feature " + featToString(visitedChromos.get(curChr)); msg += " followed later by " + featToString(lastFeature); msg += " and then " + featToString(currentFeature); throw new TribbleException.MalformedFeatureFile(msg, inputPath.toString()); - }else{ + } else { visitedChromos.put(curChr, currentFeature); } } @@ -537,19 +555,23 @@ private static Index createIndex(final Path inputPath, final FeatureIterator ite return creator.finalizeIndex(finalPosition); } - private static String featToString(final Feature feature){ + private static String featToString(final Feature feature) { return feature.getContig() + ":" + feature.getStart() + "-" + feature.getEnd(); } - private static void checkSorted(final Path inputPath, final Feature lastFeature, final Feature currentFeature){ + private static void checkSorted(final Path inputPath, final Feature lastFeature, final Feature currentFeature) { // if the last currentFeature is after the current currentFeature, exception out - if (lastFeature != null && currentFeature.getStart() < lastFeature.getStart() && lastFeature.getContig().equals(currentFeature.getContig())) - throw new TribbleException.MalformedFeatureFile("Input file is not sorted by start position. \n" + - "We saw a record with a start of " + currentFeature.getContig() + ":" + currentFeature.getStart() + - " after a record with a start of " + lastFeature.getContig() + ":" + lastFeature.getStart(), inputPath.toString()); + if (lastFeature != null + && currentFeature.getStart() < lastFeature.getStart() + && lastFeature.getContig().equals(currentFeature.getContig())) + throw new TribbleException.MalformedFeatureFile( + "Input file is not sorted by start position. \n" + "We saw a record with a start of " + + currentFeature.getContig() + ":" + currentFeature.getStart() + + " after a record with a start of " + + lastFeature.getContig() + ":" + lastFeature.getStart(), + inputPath.toString()); } - /** * Iterator for reading features from a file, given a {@code FeatureCodec}. */ @@ -583,13 +605,15 @@ public FeatureIterator(final Path inputPath, final FeatureCodec other.start) - return 1; + if (this.start < other.start) return -1; + if (this.start > other.start) return 1; - if (this.end < other.end) - return -1; - if (this.end > other.end) - return 1; + if (this.end < other.end) return -1; + if (this.end > other.end) return 1; return 0; } @@ -96,16 +85,13 @@ public String toString() { return "Interval[" + this.start + ", " + this.end + "]"; } - /** * @return whether this interval overlaps the other. */ public boolean overlaps(Interval other) { - return (this.start <= other.end && - other.start <= this.end); + return (this.start <= other.end && other.start <= this.end); } - /** * @return The file block for this interval */ @@ -113,4 +99,3 @@ public Block getBlock() { return block; } } - diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java index 5910466345..4890eff802 100644 --- a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java @@ -24,7 +24,6 @@ import htsjdk.tribble.index.Index; import htsjdk.tribble.index.TribbleIndexCreator; import htsjdk.tribble.index.interval.IntervalTreeIndex.ChrIndex; - import java.io.File; import java.nio.file.Path; import java.util.ArrayList; @@ -77,8 +76,7 @@ public void addFeature(final Feature feature, final long filePosition) { // if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getContig())) { // if we're creating a new chrIndex (not the first), make sure to dump the intervals to the old chrIndex - if (!chrList.isEmpty()) - addIntervalsToLastChr(filePosition); + if (!chrList.isEmpty()) addIntervalsToLastChr(filePosition); // create a new chr index for the current contig chrList.add(new ChrIndex(feature.getContig())); @@ -90,13 +88,16 @@ public void addFeature(final Feature feature, final long filePosition) { final MutableInterval i = new MutableInterval(); i.setStart(feature.getStart()); i.setStartFilePosition(filePosition); - if(!intervals.isEmpty()) intervals.get(intervals.size()-1).setEndFilePosition(filePosition); + if (!intervals.isEmpty()) intervals.get(intervals.size() - 1).setEndFilePosition(filePosition); featureCount = 0; // reset the feature count intervals.add(i); } - + // make sure we update the ending position of the bin - intervals.get(intervals.size()-1).setStop(Math.max(feature.getEnd(),intervals.get(intervals.size()-1).getStop())); + intervals + .get(intervals.size() - 1) + .setStop(Math.max( + feature.getEnd(), intervals.get(intervals.size() - 1).getStop())); featureCount++; } @@ -106,7 +107,7 @@ public void addFeature(final Feature feature, final long filePosition) { */ private void addIntervalsToLastChr(final long currentPos) { for (int x = 0; x < intervals.size(); x++) { - if (x == intervals.size()-1) intervals.get(x).setEndFilePosition(currentPos); + if (x == intervals.size() - 1) intervals.get(x).setEndFilePosition(currentPos); chrList.getLast().insert(intervals.get(x).toInterval()); } } @@ -139,7 +140,7 @@ class MutableInterval { // the start, the stop, and the start position private int start; - private int stop; + private int stop; private long startFilePosition; private long endFilePosition; @@ -162,10 +163,10 @@ public void setEndFilePosition(final long endFilePosition) { } public Interval toInterval() { - return new Interval(start,stop,new Block(startFilePosition, endFilePosition - startFilePosition)); + return new Interval(start, stop, new Block(startFilePosition, endFilePosition - startFilePosition)); } public int getStop() { return stop; } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalTree.java b/src/main/java/htsjdk/tribble/index/interval/IntervalTree.java index 855ade94dd..4f6834c067 100644 --- a/src/main/java/htsjdk/tribble/index/interval/IntervalTree.java +++ b/src/main/java/htsjdk/tribble/index/interval/IntervalTree.java @@ -18,7 +18,6 @@ package htsjdk.tribble.index.interval; - import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; @@ -52,7 +51,6 @@ public IntervalTree() { this.size = 0; } - public void insert(Interval interval) { Node node = new Node(interval); insert(node); @@ -137,7 +135,6 @@ private List getAll(Node node, List results) { return results; } - /** * Used for testing only. * @@ -145,8 +142,7 @@ private List getAll(Node node, List results) { * @return */ private int getRealMax(Node node) { - if (node.isNull()) - return Integer.MIN_VALUE; + if (node.isNull()) return Integer.MIN_VALUE; int leftMax = getRealMax(node.left); int rightMax = getRealMax(node.right); int nodeHigh = (node.interval).end; @@ -162,8 +158,7 @@ private int getRealMax(Node node) { * @return */ private int getRealMin(Node node) { - if (node.isNull()) - return Integer.MAX_VALUE; + if (node.isNull()) return Integer.MAX_VALUE; int leftMin = getRealMin(node.left); int rightMin = getRealMin(node.right); @@ -173,7 +168,6 @@ private int getRealMin(Node node) { return (min1 < nodeLow ? min1 : nodeLow); } - private void insert(Node x) { assert (x != null); assert (!x.isNull()); @@ -218,12 +212,10 @@ private void insert(Node x) { this.root.color = Node.BLACK; } - private Node root() { return this.root; } - private void leftRotate(Node x) { Node y = x.right; x.right = y.left; @@ -248,7 +240,6 @@ private void leftRotate(Node x) { // of x, and will be touched by applyUpdate(). } - private void rightRotate(Node x) { Node y = x.left; x.left = y.right; @@ -268,13 +259,11 @@ private void rightRotate(Node x) { y.right = x; x.parent = y; - applyUpdate(x); // no need to apply update on y, since it'll y is an ancestor // of x, and will be touched by applyUpdate(). } - /** * Note: Does not maintain RB constraints, this is done post insert * @@ -307,7 +296,6 @@ private void treeInsert(Node x) { this.applyUpdate(x); } - // Applies the statistic update on the node and its ancestors. private void applyUpdate(Node node) { @@ -331,47 +319,38 @@ public int size() { return _size(this.root); } - private int _size(Node node) { - if (node.isNull()) - return 0; + if (node.isNull()) return 0; return 1 + _size(node.left) + _size(node.right); } - private boolean allRedNodesFollowConstraints(Node node) { - if (node.isNull()) - return true; + if (node.isNull()) return true; if (node.color == Node.BLACK) { - return (allRedNodesFollowConstraints(node.left) && - allRedNodesFollowConstraints(node.right)); + return (allRedNodesFollowConstraints(node.left) && allRedNodesFollowConstraints(node.right)); } // At this point, we know we're on a RED node. - return (node.left.color == Node.BLACK && - node.right.color == Node.BLACK && - allRedNodesFollowConstraints(node.left) && - allRedNodesFollowConstraints(node.right)); + return (node.left.color == Node.BLACK + && node.right.color == Node.BLACK + && allRedNodesFollowConstraints(node.left) + && allRedNodesFollowConstraints(node.right)); } - // Check that both ends are equally balanced in terms of black height. private boolean isBalancedBlackHeight(Node node) { - if (node.isNull()) - return true; - return (blackHeight(node.left) == blackHeight(node.right) && - isBalancedBlackHeight(node.left) && - isBalancedBlackHeight(node.right)); + if (node.isNull()) return true; + return (blackHeight(node.left) == blackHeight(node.right) + && isBalancedBlackHeight(node.left) + && isBalancedBlackHeight(node.right)); } - // The black height of a node should be left/right equal. private int blackHeight(Node node) { - if (node.isNull()) - return 0; + if (node.isNull()) return 0; int leftBlackHeight = blackHeight(node.left); if (node.color == Node.BLACK) { return leftBlackHeight + 1; @@ -380,7 +359,6 @@ private int blackHeight(Node node) { } } - /** * Test code: make sure that the tree has all the properties * defined by Red Black trees and interval trees @@ -401,45 +379,35 @@ private int blackHeight(Node node) { */ public boolean isValid() { if (this.root.color != Node.BLACK) { - //logger.warn("root color is wrong"); + // logger.warn("root color is wrong"); return false; } if (NIL.color != Node.BLACK) { - //logger.warn("NIL color is wrong"); + // logger.warn("NIL color is wrong"); return false; } if (allRedNodesFollowConstraints(this.root) == false) { - //logger.warn("red node doesn't follow constraints"); + // logger.warn("red node doesn't follow constraints"); return false; } if (isBalancedBlackHeight(this.root) == false) { - //logger.warn("black height unbalanced"); + // logger.warn("black height unbalanced"); return false; } - return hasCorrectMaxFields(this.root) && - hasCorrectMinFields(this.root); + return hasCorrectMaxFields(this.root) && hasCorrectMinFields(this.root); } - private boolean hasCorrectMaxFields(Node node) { - if (node.isNull()) - return true; - return (getRealMax(node) == (node.max) && - hasCorrectMaxFields(node.left) && - hasCorrectMaxFields(node.right)); + if (node.isNull()) return true; + return (getRealMax(node) == (node.max) && hasCorrectMaxFields(node.left) && hasCorrectMaxFields(node.right)); } - private boolean hasCorrectMinFields(Node node) { - if (node.isNull()) - return true; - return (getRealMin(node) == (node.min) && - hasCorrectMinFields(node.left) && - hasCorrectMinFields(node.right)); + if (node.isNull()) return true; + return (getRealMin(node) == (node.min) && hasCorrectMinFields(node.left) && hasCorrectMinFields(node.right)); } - static class Node { public static boolean BLACK = false; @@ -456,7 +424,6 @@ static class Node { boolean color; Node parent; - private Node() { this.max = Integer.MIN_VALUE; this.min = Integer.MAX_VALUE; @@ -467,7 +434,6 @@ public void store(DataOutputStream dos) throws IOException { dos.writeInt(interval.end); dos.writeInt(min); dos.writeInt(max); - } public Node(Interval interval) { @@ -479,7 +445,6 @@ public Node(Interval interval) { this.color = RED; } - static Node NIL; static { @@ -490,12 +455,10 @@ public Node(Interval interval) { NIL.right = NIL; } - public boolean isNull() { return this == NIL; } - public String toString() { // Make some shorthand for the nodes @@ -540,7 +503,6 @@ public void _toString(StringBuffer buf, Map keys) { keys.put(this.right.interval, rightKey); } - buf.append(selfKey + " -> " + leftKey + " , " + rightKey); buf.append('\n'); this.left._toString(buf, keys); @@ -548,4 +510,3 @@ public void _toString(StringBuffer buf, Map keys) { } } } - diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java b/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java index c4b2865dca..9a551c51a1 100644 --- a/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java +++ b/src/main/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java @@ -22,7 +22,6 @@ import htsjdk.tribble.index.Block; import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; @@ -120,9 +119,7 @@ public static class ChrIndex implements htsjdk.tribble.index.ChrIndex { /** * Default constructor needed for factory methods -- DO NOT REMOVE */ - public ChrIndex() { - - } + public ChrIndex() {} public ChrIndex(final String name) { this.name = name; @@ -143,7 +140,6 @@ public List getBlocks() { return null; } - @Override public List getBlocks(final int start, final int end) { @@ -163,8 +159,11 @@ public List getBlocks(final int start, final int end) { Arrays.sort(blocks, new Comparator() { @Override public int compare(final Block b1, final Block b2) { - // this is a little cryptic because the normal method (b1.getStartPosition() - b2.getStartPosition()) wraps in int space and we incorrectly sort the blocks in extreme cases - return b1.getStartPosition() - b2.getStartPosition() < 1 ? -1 : (b1.getStartPosition() - b2.getStartPosition() > 1 ? 1 : 0); + // this is a little cryptic because the normal method (b1.getStartPosition() - + // b2.getStartPosition()) wraps in int space and we incorrectly sort the blocks in extreme cases + return b1.getStartPosition() - b2.getStartPosition() < 1 + ? -1 + : (b1.getStartPosition() - b2.getStartPosition() > 1 ? 1 : 0); } }); @@ -202,7 +201,6 @@ public void write(final LittleEndianOutputStream dos) throws IOException { dos.writeLong(interval.getBlock().getStartPosition()); dos.writeInt((int) interval.getBlock().getSize()); } - } @Override @@ -222,9 +220,6 @@ public void read(final LittleEndianInputStream dis) throws IOException { final Interval iv = new Interval(start, end, new Block(pos, size)); tree.insert(iv); } - - } - } } diff --git a/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java b/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java index 835987fd5a..d8f28fb079 100644 --- a/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java +++ b/src/main/java/htsjdk/tribble/index/linear/LinearIndex.java @@ -24,7 +24,6 @@ import htsjdk.tribble.index.Index; import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -54,13 +53,15 @@ public class LinearIndex extends AbstractIndex { // NOTE: To debug uncomment the System.getProperty and recompile. - public static final double MAX_FEATURES_PER_BIN = Double.parseDouble(System.getProperty("MAX_FEATURES_PER_BIN", "100")); + public static final double MAX_FEATURES_PER_BIN = + Double.parseDouble(System.getProperty("MAX_FEATURES_PER_BIN", "100")); public static final int INDEX_TYPE = IndexType.LINEAR.fileHeaderTypeIdentifier; - private final static int MAX_BIN_WIDTH = 1 * 1000 * 1000 * 1000; // widths must be less than 1 billion + private static final int MAX_BIN_WIDTH = 1 * 1000 * 1000 * 1000; // widths must be less than 1 billion // 1MB: we will no merge bins with any features in them beyond this size, no matter how sparse, per chromosome - private static final long MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX = Long.parseLong(System.getProperty("MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX", "1024000")); + private static final long MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX = + Long.parseLong(System.getProperty("MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX", "1024000")); public static boolean enableAdaptiveIndexing = true; @@ -71,8 +72,7 @@ public class LinearIndex extends AbstractIndex { */ public LinearIndex(final List indices, final Path featureFile) { super(featureFile); - for (final ChrIndex index : indices) - chrIndices.put(index.getName(), index); + for (final ChrIndex index : indices) chrIndices.put(index.getName(), index); } /** @@ -86,8 +86,7 @@ public LinearIndex(final List indices, final File featureFile) { private LinearIndex(final LinearIndex parent, final List indices) { super(parent); - for (final ChrIndex index : indices) - chrIndices.put(index.getName(), index); + for (final ChrIndex index : indices) chrIndices.put(index.getName(), index); } /** @@ -122,8 +121,7 @@ public boolean isCurrentVersion() { // todo fixme nasty hack to determine if this is an old style V3 linear index (without nFeaturesPerBin) for (final htsjdk.tribble.index.ChrIndex chrIndex : chrIndices.values()) - if (((ChrIndex) chrIndex).OLD_V3_INDEX) - return false; + if (((ChrIndex) chrIndex).OLD_V3_INDEX) return false; return true; } @@ -135,8 +133,9 @@ protected int getType() { @Override public List getSequenceNames() { - return (chrIndices == null ? Collections.emptyList() : - Collections.unmodifiableList(new ArrayList<>(chrIndices.keySet()))); + return (chrIndices == null + ? Collections.emptyList() + : Collections.unmodifiableList(new ArrayList<>(chrIndices.keySet()))); } @Override @@ -144,7 +143,6 @@ public Class getChrIndexClass() { return ChrIndex.class; } - /** * Blocks are organized as a simple flat list: *

    @@ -162,8 +160,8 @@ public Class getChrIndexClass() { * Note that covered regions are open on the left ( and closed on the right ]. *

    * In general, if block i is the ith block (starting from 0), then block i - * contains all records that have starting position > (i * binWidth) and - * <= ((i + 1) * binWidth)) + * contains all records that have starting position {@code > (i * binWidth)} and + * {@code <= ((i + 1) * binWidth))} */ public static class ChrIndex implements htsjdk.tribble.index.ChrIndex { private String name = ""; @@ -177,16 +175,14 @@ public static class ChrIndex implements htsjdk.tribble.index.ChrIndex { /** * Default constructor needed for factory methods -- DO NOT REMOVE */ - public ChrIndex() { - - } + public ChrIndex() {} ChrIndex(final String name, final int binWidth) { this.name = name; this.binWidth = binWidth; this.blocks = new ArrayList(100); this.longestFeature = 0; - //this.largestBlockSize = 0; + // this.largestBlockSize = 0; this.nFeatures = 0; } @@ -197,7 +193,7 @@ public String getName() { void addBlock(final Block block) { blocks.add(block); - //largestBlockSize = Math.max(largestBlockSize, block.getSize()); + // largestBlockSize = Math.max(largestBlockSize, block.getSize()); } public int getNBlocks() { @@ -219,14 +215,15 @@ public List getBlocks(final int start, final int end) { final int adjustedPosition = Math.max(start - longestFeature, 0); final int startBinNumber = adjustedPosition / binWidth; if (startBinNumber >= blocks.size()) // are we off the end of the bin list, so return nothing - return Collections.emptyList(); + return Collections.emptyList(); else { final int endBinNumber = Math.min((end - 1) / binWidth, blocks.size() - 1); // By definition blocks are adjacent for the liner index. Combine them into one merged block final long startPos = blocks.get(startBinNumber).getStartPosition(); - final long endPos = blocks.get(endBinNumber).getStartPosition() + blocks.get(endBinNumber).getSize(); + final long endPos = blocks.get(endBinNumber).getStartPosition() + + blocks.get(endBinNumber).getSize(); final long size = endPos - startPos; if (size == 0) { return Collections.emptyList(); @@ -238,7 +235,6 @@ public List getBlocks(final int start, final int end) { } } - public void updateLongestFeature(final int featureLength) { longestFeature = Math.max(longestFeature, featureLength); } @@ -259,8 +255,8 @@ public void write(final LittleEndianOutputStream dos) throws IOException { dos.writeInt(binWidth); dos.writeInt(blocks.size()); dos.writeInt(longestFeature); - dos.writeInt(0); // no longer used - //dos.writeInt(largestBlockSize); + dos.writeInt(0); // no longer used + // dos.writeInt(largestBlockSize); dos.writeInt(nFeatures); long pos = 0; @@ -280,7 +276,7 @@ public void read(final LittleEndianInputStream dis) throws IOException { binWidth = dis.readInt(); final int nBins = dis.readInt(); longestFeature = dis.readInt(); - //largestBlockSize = dis.readInt(); + // largestBlockSize = dis.readInt(); // largestBlockSize and totalBlockSize are old V3 index values. largest block size should be 0 for // all newer V3 block. This is a nasty hack that should be removed when we go to V4 (XML!) indices OLD_V3_INDEX = dis.readInt() > 0; @@ -303,7 +299,7 @@ public boolean equals(final Object obj) { final ChrIndex other = (ChrIndex) obj; return binWidth == other.binWidth && longestFeature == other.longestFeature - //&& largestBlockSize == other.largestBlockSize + // && largestBlockSize == other.largestBlockSize && nFeatures == other.nFeatures && name.equals(other.name) && blocks.equals(other.blocks); @@ -319,8 +315,7 @@ public int hashCode() { */ public long getTotalSize() { long n = 0; - for (final Block b : getBlocks()) - n += b.getSize(); + for (final Block b : getBlocks()) n += b.getSize(); return n; } @@ -351,8 +346,10 @@ public ChrIndex optimize(final double threshold) { private static boolean badBinWidth(final ChrIndex idx) { if (idx.binWidth > MAX_BIN_WIDTH || idx.binWidth < 0) // an overflow occurred - return true; - else if (MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX != 0 && idx.getNFeatures() > 1 && idx.binWidth > MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX) { + return true; + else if (MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX != 0 + && idx.getNFeatures() > 1 + && idx.binWidth > MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX) { return true; } else { return false; @@ -365,8 +362,7 @@ private static ChrIndex optimize(ChrIndex idx, final double threshold, int level while (true) { final double score = idx.optimizeScore(); - if (score > threshold || idx.getNBlocks() == 1 || badBinWidth(idx)) - break; + if (score > threshold || idx.getNBlocks() == 1 || badBinWidth(idx)) break; else { best = idx; // remember the last best option @@ -395,8 +391,7 @@ private static ChrIndex mergeBlocks(final ChrIndex idx) { final Block b1 = blocks.next(); final Block b2 = blocks.hasNext() ? blocks.next() : null; - if (b2 == null) - merged.addBlock(b1); + if (b2 == null) merged.addBlock(b1); else // the new block is simply the start of the first block and the size of both together merged.addBlock(new Block(b1.getStartPosition(), b1.getSize() + b2.getSize())); @@ -446,8 +441,16 @@ public void writeTable(final PrintStream out) { final LinearIndex.ChrIndex chrIdx = (LinearIndex.ChrIndex) chrIndices.get(name); int blockCount = 0; for (final Block b : chrIdx.getBlocks()) { - out.printf("%s %d %.2f %d %d %d %d %d%n", name, chrIdx.binWidth, chrIdx.getAverageFeatureSize(), chrIdx.getNFeatures(), blockCount, - blockCount * chrIdx.binWidth, b.getSize(), (int) (b.getSize() / chrIdx.getAverageFeatureSize())); + out.printf( + "%s %d %.2f %d %d %d %d %d%n", + name, + chrIdx.binWidth, + chrIdx.getAverageFeatureSize(), + chrIdx.getNFeatures(), + blockCount, + blockCount * chrIdx.binWidth, + b.getSize(), + (int) (b.getSize() / chrIdx.getAverageFeatureSize())); blockCount++; } } @@ -458,4 +461,3 @@ protected final void setTS(final long ts) { this.indexedFileTS = ts; } } - diff --git a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java index acbe3b8d77..bdde25fe33 100644 --- a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java @@ -28,7 +28,6 @@ import htsjdk.tribble.index.Block; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.TribbleIndexCreator; - import java.io.File; import java.nio.file.Path; import java.util.ArrayList; @@ -39,7 +38,7 @@ * * @author jrobinso */ -public class LinearIndexCreator extends TribbleIndexCreator { +public class LinearIndexCreator extends TribbleIndexCreator { public static final int DEFAULT_BIN_WIDTH = 8000; // the set bin width private int binWidth = DEFAULT_BIN_WIDTH; @@ -48,7 +47,7 @@ public class LinearIndexCreator extends TribbleIndexCreator { private final Path inputFile; private final LinkedList chrList = new LinkedList(); - private int longestFeature= 0; + private int longestFeature = 0; private final ArrayList blocks = new ArrayList(); @@ -81,10 +80,14 @@ public void addFeature(final Feature feature, final long filePosition) { // if we're creating a new chrIndex (not the first), make sure to dump the blocks to the old chrIndex if (!chrList.isEmpty()) for (int x = 0; x < blocks.size(); x++) { - blocks.get(x).setEndPosition((x + 1 == blocks.size()) ? filePosition : blocks.get(x + 1).getStartPosition()); + blocks.get(x) + .setEndPosition( + (x + 1 == blocks.size()) + ? filePosition + : blocks.get(x + 1).getStartPosition()); chrList.getLast().addBlock(blocks.get(x)); } - chrList.add(new LinearIndex.ChrIndex(feature.getContig(),binWidth)); + chrList.add(new LinearIndex.ChrIndex(feature.getContig(), binWidth)); blocks.clear(); // Add the first block @@ -94,10 +97,10 @@ public void addFeature(final Feature feature, final long filePosition) { // if start > current bin location, make new bins until we're at the correct location while (feature.getStart() > blocks.size() * binWidth) { - blocks.add(new Block(filePosition,0)); + blocks.add(new Block(filePosition, 0)); } - if ((feature.getEnd()- feature.getStart())+1 > longestFeature) { - longestFeature = (feature.getEnd()- feature.getStart())+1; + if ((feature.getEnd() - feature.getStart()) + 1 > longestFeature) { + longestFeature = (feature.getEnd() - feature.getStart()) + 1; chrList.getLast().updateLongestFeature(longestFeature); } chrList.getLast().incrementFeatureCount(); @@ -114,12 +117,16 @@ public Index finalizeIndex(final long finalFilePosition) { throw new IllegalArgumentException("finalFilePosition != 0, -> " + finalFilePosition); for (int x = 0; x < blocks.size(); x++) { - blocks.get(x).setEndPosition((x + 1 == blocks.size()) ? finalFilePosition : blocks.get(x+1).getStartPosition()); + blocks.get(x) + .setEndPosition( + (x + 1 == blocks.size()) + ? finalFilePosition + : blocks.get(x + 1).getStartPosition()); chrList.getLast().addBlock(blocks.get(x)); } blocks.clear(); - final LinearIndex index = new LinearIndex(chrList,inputFile); + final LinearIndex index = new LinearIndex(chrList, inputFile); index.addProperties(properties); index.finalizeIndex(); return index.optimize(); @@ -133,6 +140,7 @@ public int defaultBinSize() { return DEFAULT_BIN_WIDTH; } - public int getBinSize() { return binWidth; } + public int getBinSize() { + return binWidth; + } } - diff --git a/src/main/java/htsjdk/tribble/index/tabix/AllRefsTabixIndexCreator.java b/src/main/java/htsjdk/tribble/index/tabix/AllRefsTabixIndexCreator.java index ecc1bbd1b6..67430a59ab 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/AllRefsTabixIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/tabix/AllRefsTabixIndexCreator.java @@ -32,7 +32,6 @@ import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexCreator; import htsjdk.utils.ValidationUtils; - import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @@ -53,8 +52,7 @@ public class AllRefsTabixIndexCreator implements IndexCreator { // defines the location of the end of the previous feature in the output file. private TabixFeature previousFeature = null; - public AllRefsTabixIndexCreator(final SAMSequenceDictionary sequenceDictionary, - final TabixFormat formatSpec) { + public AllRefsTabixIndexCreator(final SAMSequenceDictionary sequenceDictionary, final TabixFormat formatSpec) { ValidationUtils.nonNull(sequenceDictionary); this.sequenceDictionary = sequenceDictionary; this.formatSpec = formatSpec.clone(); @@ -76,14 +74,16 @@ public void addFeature(final Feature feature, final long filePosition) { advance = true; } if (referenceIndex != currentReferenceIndex && referenceIndex != currentReferenceIndex + 1) { - throw new IllegalArgumentException("Sequence " + feature + " added out of order" + (" currentReferenceIndex: " + currentReferenceIndex + ", referenceIndex:" + referenceIndex)); + throw new IllegalArgumentException("Sequence " + feature + " added out of order" + + (" currentReferenceIndex: " + currentReferenceIndex + ", referenceIndex:" + referenceIndex)); } } - final TabixFeature thisFeature = new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition); + final TabixFeature thisFeature = + new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition); if (previousFeature != null) { if (previousFeature.compareTo(thisFeature) > 0) { - throw new IllegalArgumentException(String.format("Features added out of order: previous (%s) > next (%s)", - previousFeature, thisFeature)); + throw new IllegalArgumentException(String.format( + "Features added out of order: previous (%s) > next (%s)", previousFeature, thisFeature)); } finalizeFeature(filePosition); } @@ -96,7 +96,8 @@ public void addFeature(final Feature feature, final long filePosition) { private void finalizeFeature(final long featureEndPosition) { previousFeature.featureEndFilePosition = featureEndPosition; if (previousFeature.featureStartFilePosition >= previousFeature.featureEndFilePosition) { - throw new IllegalArgumentException(String.format("Feature start position %d >= feature end position %d", + throw new IllegalArgumentException(String.format( + "Feature start position %d >= feature end position %d", previousFeature.featureStartFilePosition, previousFeature.featureEndFilePosition)); } indexBuilder.processFeature(previousFeature); @@ -129,11 +130,12 @@ public Index finalizeIndex(final long finalFilePosition) { // but truncate the sequence dictionary before its end if there are sequences in the sequence dictionary without // any features. final BinningIndexContent[] indices = indexContents.toArray(new BinningIndexContent[sequenceDictionary.size()]); - List sequenceNames = sequenceDictionary.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()); + List sequenceNames = sequenceDictionary.getSequences().stream() + .map(SAMSequenceRecord::getSequenceName) + .collect(Collectors.toList()); return new TabixIndex(formatSpec, sequenceNames, indices); } - private static class TabixFeature implements BinningIndexBuilder.FeatureToBeIndexed, Comparable { private final int referenceIndex; private final int start; @@ -142,7 +144,8 @@ private static class TabixFeature implements BinningIndexBuilder.FeatureToBeInde // Position after this feature in the file. private long featureEndFilePosition = -1; - private TabixFeature(final int referenceIndex, final int start, final int end, final long featureStartFilePosition) { + private TabixFeature( + final int referenceIndex, final int start, final int end, final long featureStartFilePosition) { this.referenceIndex = referenceIndex; this.start = start; this.end = end; @@ -185,13 +188,12 @@ public int compareTo(final TabixFeature other) { @Override public String toString() { - return "TabixFeature{" + - "referenceIndex=" + referenceIndex + - ", start=" + start + - ", end=" + end + - ", featureStartFilePosition=" + featureStartFilePosition + - ", featureEndFilePosition=" + featureEndFilePosition + - '}'; + return "TabixFeature{" + "referenceIndex=" + + referenceIndex + ", start=" + + start + ", end=" + + end + ", featureStartFilePosition=" + + featureStartFilePosition + ", featureEndFilePosition=" + + featureEndFilePosition + '}'; } } } diff --git a/src/main/java/htsjdk/tribble/index/tabix/StreamBasedTabixIndexCreator.java b/src/main/java/htsjdk/tribble/index/tabix/StreamBasedTabixIndexCreator.java index d73531bade..11321aacb4 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/StreamBasedTabixIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/tabix/StreamBasedTabixIndexCreator.java @@ -28,7 +28,6 @@ import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.tribble.index.Index; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.IOException; import java.io.OutputStream; import java.nio.file.Path; @@ -39,43 +38,37 @@ */ public class StreamBasedTabixIndexCreator extends AllRefsTabixIndexCreator { - static class StreamBasedTabixIndex extends TabixIndex { - private final OutputStream out; + static class StreamBasedTabixIndex extends TabixIndex { + private final OutputStream out; - StreamBasedTabixIndex( - TabixFormat formatSpec, - List sequenceNames, - BinningIndexContent[] indices, - OutputStream out) { - super(formatSpec, sequenceNames, indices); - this.out = out; - } + StreamBasedTabixIndex( + TabixFormat formatSpec, List sequenceNames, BinningIndexContent[] indices, OutputStream out) { + super(formatSpec, sequenceNames, indices); + this.out = out; + } - @Override - public void writeBasedOnFeaturePath(final Path featurePath) throws IOException { - try (final LittleEndianOutputStream los = - new LittleEndianOutputStream(new BlockCompressedOutputStream(out, (Path) null))) { - write(los); - } + @Override + public void writeBasedOnFeaturePath(final Path featurePath) throws IOException { + try (final LittleEndianOutputStream los = + new LittleEndianOutputStream(new BlockCompressedOutputStream(out, (Path) null))) { + write(los); + } + } } - } - private final OutputStream out; + private final OutputStream out; - public StreamBasedTabixIndexCreator( - SAMSequenceDictionary sequenceDictionary, TabixFormat formatSpec, OutputStream out) { - super(sequenceDictionary, formatSpec); - this.out = out; - } + public StreamBasedTabixIndexCreator( + SAMSequenceDictionary sequenceDictionary, TabixFormat formatSpec, OutputStream out) { + super(sequenceDictionary, formatSpec); + this.out = out; + } - @Override - public Index finalizeIndex(long finalFilePosition) { - final Index index = super.finalizeIndex(finalFilePosition); - final TabixIndex tabixIndex = (TabixIndex) index; - return new StreamBasedTabixIndex( - tabixIndex.getFormatSpec(), - tabixIndex.getSequenceNames(), - tabixIndex.getIndices(), - out); - } + @Override + public Index finalizeIndex(long finalFilePosition) { + final Index index = super.finalizeIndex(finalFilePosition); + final TabixIndex tabixIndex = (TabixIndex) index; + return new StreamBasedTabixIndex( + tabixIndex.getFormatSpec(), tabixIndex.getSequenceNames(), tabixIndex.getIndices(), out); + } } diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixFormat.java b/src/main/java/htsjdk/tribble/index/tabix/TabixFormat.java index ef8d40dac9..a398e54563 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/TabixFormat.java +++ b/src/main/java/htsjdk/tribble/index/tabix/TabixFormat.java @@ -29,14 +29,15 @@ * The values in a Tabix header that define the format of the file being indexed, e.g. gff, bed, vcf */ public class TabixFormat implements Cloneable { - public static final int ZERO_BASED = 0x10000; + public static final int ZERO_BASED = 0x10000; public static final int GENERIC_FLAGS = 0; - public static final int SAM_FLAGS = 1; - public static final int VCF_FLAGS = 2; - public static final int UCSC_FLAGS = GENERIC_FLAGS | ZERO_BASED; + public static final int SAM_FLAGS = 1; + public static final int VCF_FLAGS = 2; + public static final int UCSC_FLAGS = GENERIC_FLAGS | ZERO_BASED; /** Predefined headers for known formats */ public static final TabixFormat GFF = new TabixFormat(GENERIC_FLAGS, 1, 4, 5, '#', 0); + public static final TabixFormat BED = new TabixFormat(UCSC_FLAGS, 1, 2, 3, '#', 0); public static final TabixFormat PSLTBL = new TabixFormat(UCSC_FLAGS, 15, 17, 18, '#', 0); public static final TabixFormat SAM = new TabixFormat(SAM_FLAGS, 3, 4, 0, '@', 0); @@ -59,10 +60,15 @@ public class TabixFormat implements Cloneable { /** TODO: This is written, and part of the index header, but does not appear to be used. */ public int numHeaderLinesToSkip; - public TabixFormat() { - } + public TabixFormat() {} - public TabixFormat(final int flags, final int sequenceColumn, final int startPositionColumn, final int endPositionColumn, final char metaCharacter, final int numHeaderLinesToSkip) { + public TabixFormat( + final int flags, + final int sequenceColumn, + final int startPositionColumn, + final int endPositionColumn, + final char metaCharacter, + final int numHeaderLinesToSkip) { this.flags = flags; this.sequenceColumn = sequenceColumn; this.startPositionColumn = startPositionColumn; @@ -74,7 +80,7 @@ public TabixFormat(final int flags, final int sequenceColumn, final int startPos @Override public TabixFormat clone() { try { - return (TabixFormat)super.clone(); + return (TabixFormat) super.clone(); } catch (final CloneNotSupportedException e) { throw new TribbleException("unpossible!"); } diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java index 7a506e4f59..ee68dba0c9 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java +++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java @@ -30,8 +30,6 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.samtools.util.CloserUtil; -import htsjdk.samtools.util.IOUtil; -import htsjdk.samtools.util.Log; import htsjdk.samtools.util.StringUtil; import htsjdk.tribble.Tribble; import htsjdk.tribble.TribbleException; @@ -39,8 +37,6 @@ import htsjdk.tribble.index.Index; import htsjdk.tribble.util.LittleEndianInputStream; import htsjdk.tribble.util.LittleEndianOutputStream; -import htsjdk.tribble.util.TabixUtils; - import java.io.EOFException; import java.io.File; import java.io.IOException; @@ -76,7 +72,8 @@ public class TabixIndex implements Index { * @param sequenceNames Sequences in the file being indexed, in the order they appear in the file. * @param indices One for each element of sequenceNames */ - public TabixIndex(final TabixFormat formatSpec, final List sequenceNames, final BinningIndexContent[] indices) { + public TabixIndex( + final TabixFormat formatSpec, final List sequenceNames, final BinningIndexContent[] indices) { if (sequenceNames.size() != indices.length) { throw new IllegalArgumentException("sequenceNames.size() != indices.length"); } @@ -162,8 +159,8 @@ public List getBlocks(final String chr, final int start, final int end) { } else { final List ret = new ArrayList<>(chunks.size()); chunks.stream() - .map(chunk -> new Block(chunk.getChunkStart(), chunk.getChunkEnd() - chunk.getChunkStart())) - .forEach(ret::add); + .map(chunk -> new Block(chunk.getChunkStart(), chunk.getChunkEnd() - chunk.getChunkStart())) + .forEach(ret::add); return ret; } } @@ -201,7 +198,6 @@ public boolean equalsIgnoreProperties(final Object o) { if (!formatSpec.equals(that.formatSpec)) return false; if (!Arrays.equals(indices, that.indices)) return false; return sequenceNames.equals(that.sequenceNames); - } public TabixFormat getFormatSpec() { @@ -219,7 +215,8 @@ public BinningIndexContent[] getIndices() { */ @Override public void write(final Path tabixPath) throws IOException { - try(final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(Files.newOutputStream(tabixPath), (Path)null))) { + try (final LittleEndianOutputStream los = new LittleEndianOutputStream( + new BlockCompressedOutputStream(Files.newOutputStream(tabixPath), (Path) null))) { write(los); } } @@ -265,7 +262,8 @@ public void write(final LittleEndianOutputStream los) throws IOException { } } - private void writeSequence(final BinningIndexContent indexContent, final LittleEndianOutputStream los) throws IOException { + private void writeSequence(final BinningIndexContent indexContent, final LittleEndianOutputStream los) + throws IOException { if (indexContent == null) { los.writeInt(0); } else { @@ -278,7 +276,8 @@ private void writeSequence(final BinningIndexContent indexContent, final LittleE } } - private void writeLinearIndex(final LinearIndex linearIndex, final LittleEndianOutputStream los) throws IOException { + private void writeLinearIndex(final LinearIndex linearIndex, final LittleEndianOutputStream los) + throws IOException { if (linearIndex.getIndexStart() != 0) { // This could be handled by writing zeroes, but it is not expected so just fail. throw new IllegalArgumentException("Non-zero linear index start"); @@ -305,7 +304,8 @@ private void writeBin(final Bin bin, final LittleEndianOutputStream los) throws * @param referenceSequenceIndex Merely for setting in the returned object, not for seeking into the file. * @param dis This method assumes that the current position is at the start of the reference. */ - private BinningIndexContent loadSequence(final int referenceSequenceIndex, final LittleEndianInputStream dis) throws IOException { + private BinningIndexContent loadSequence(final int referenceSequenceIndex, final LittleEndianInputStream dis) + throws IOException { final int numBins = dis.readInt(); if (numBins == 0) return null; int nonNullBins = 0; @@ -330,11 +330,14 @@ private BinningIndexContent loadSequence(final int referenceSequenceIndex, final } } final LinearIndex linearIndex = loadLinearIndex(referenceSequenceIndex, dis); - return new BinningIndexContent(referenceSequenceIndex, - new BinningIndexContent.BinList(bins.toArray(new Bin[bins.size()]), nonNullBins), linearIndex); + return new BinningIndexContent( + referenceSequenceIndex, + new BinningIndexContent.BinList(bins.toArray(new Bin[bins.size()]), nonNullBins), + linearIndex); } - private LinearIndex loadLinearIndex(final int referenceSequenceIndex, final LittleEndianInputStream dis) throws IOException { + private LinearIndex loadLinearIndex(final int referenceSequenceIndex, final LittleEndianInputStream dis) + throws IOException { final int numElements = dis.readInt(); final long[] elements = new long[numElements]; for (int i = 0; i < numElements; ++i) { diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java index 001dabc777..44c933be5a 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java +++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java @@ -30,7 +30,6 @@ import htsjdk.tribble.Feature; import htsjdk.tribble.index.Index; import htsjdk.tribble.index.IndexCreator; - import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -56,13 +55,11 @@ public class TabixIndexCreator implements IndexCreator { // defines the location of the end of the previous feature in the output file. private TabixFeature previousFeature = null; - /** * @param sequenceDictionary is not required, but if present all features added must refer to sequences in the * dictionary. It is used to optimize the memory needed to build the index. */ - public TabixIndexCreator(final SAMSequenceDictionary sequenceDictionary, - final TabixFormat formatSpec) { + public TabixIndexCreator(final SAMSequenceDictionary sequenceDictionary, final TabixFormat formatSpec) { this.sequenceDictionary = sequenceDictionary; this.formatSpec = formatSpec.clone(); } @@ -83,11 +80,12 @@ public void addFeature(final Feature feature, final long filePosition) { throw new IllegalArgumentException("Sequence " + feature + " added out sequence of order"); } } - final TabixFeature thisFeature = new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition); + final TabixFeature thisFeature = + new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition); if (previousFeature != null) { if (previousFeature.compareTo(thisFeature) > 0) { - throw new IllegalArgumentException(String.format("Features added out of order: previous (%s) > next (%s)", - previousFeature, thisFeature)); + throw new IllegalArgumentException(String.format( + "Features added out of order: previous (%s) > next (%s)", previousFeature, thisFeature)); } finalizeFeature(filePosition); } @@ -100,7 +98,8 @@ public void addFeature(final Feature feature, final long filePosition) { private void finalizeFeature(final long featureEndPosition) { previousFeature.featureEndFilePosition = featureEndPosition; if (previousFeature.featureStartFilePosition >= previousFeature.featureEndFilePosition) { - throw new IllegalArgumentException(String.format("Feature start position %d >= feature end position %d", + throw new IllegalArgumentException(String.format( + "Feature start position %d >= feature end position %d", previousFeature.featureStartFilePosition, previousFeature.featureEndFilePosition)); } indexBuilder.processFeature(previousFeature); @@ -138,7 +137,6 @@ public Index finalizeIndex(final long finalFilePosition) { return new TabixIndex(formatSpec, sequenceNames, indices); } - private static class TabixFeature implements BinningIndexBuilder.FeatureToBeIndexed, Comparable { private final int referenceIndex; private final int start; @@ -147,7 +145,8 @@ private static class TabixFeature implements BinningIndexBuilder.FeatureToBeInde // Position after this feature in the file. private long featureEndFilePosition = -1; - private TabixFeature(final int referenceIndex, final int start, final int end, final long featureStartFilePosition) { + private TabixFeature( + final int referenceIndex, final int start, final int end, final long featureStartFilePosition) { this.referenceIndex = referenceIndex; this.start = start; this.end = end; @@ -190,13 +189,12 @@ public int compareTo(final TabixFeature other) { @Override public String toString() { - return "TabixFeature{" + - "referenceIndex=" + referenceIndex + - ", start=" + start + - ", end=" + end + - ", featureStartFilePosition=" + featureStartFilePosition + - ", featureEndFilePosition=" + featureEndFilePosition + - '}'; + return "TabixFeature{" + "referenceIndex=" + + referenceIndex + ", start=" + + start + ", end=" + + end + ", featureStartFilePosition=" + + featureStartFilePosition + ", featureEndFilePosition=" + + featureEndFilePosition + '}'; } } } diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexMerger.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexMerger.java index 36d4383b89..7417d70e06 100644 --- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexMerger.java +++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexMerger.java @@ -29,7 +29,6 @@ import htsjdk.samtools.LinearIndex; import htsjdk.samtools.util.BlockCompressedOutputStream; import htsjdk.tribble.util.LittleEndianOutputStream; - import java.io.File; import java.io.IOException; import java.io.OutputStream; @@ -81,12 +80,13 @@ public void processIndex(final TabixIndex index, final long partLength) { } } if (!index.getFormatSpec().equals(formatSpec)) { - throw new IllegalArgumentException( - String.format("Cannot merge tabix files with different formats, %s and %s.", index.getFormatSpec(), formatSpec)); + throw new IllegalArgumentException(String.format( + "Cannot merge tabix files with different formats, %s and %s.", index.getFormatSpec(), formatSpec)); } if (!sequenceNames.equals(index.getSequenceNames())) { - throw new IllegalArgumentException( - String.format("Cannot merge tabix files with different sequence names, %s and %s.", index.getSequenceNames(), sequenceNames)); + throw new IllegalArgumentException(String.format( + "Cannot merge tabix files with different sequence names, %s and %s.", + index.getSequenceNames(), sequenceNames)); } indexes.add(index); } @@ -102,23 +102,33 @@ public void finish(final long dataFileLength) throws IOException { final List mergedBinningIndexContentList = new ArrayList<>(); for (int ref = 0; ref < sequenceNames.size(); ref++) { final int r = ref; - List binningIndexContentList = indexes.stream().map(index -> index.getIndices()[r]).collect(Collectors.toList()); - final BinningIndexContent binningIndexContent = mergeBinningIndexContent(ref, binningIndexContentList, offsets); + List binningIndexContentList = + indexes.stream().map(index -> index.getIndices()[r]).collect(Collectors.toList()); + final BinningIndexContent binningIndexContent = + mergeBinningIndexContent(ref, binningIndexContentList, offsets); mergedBinningIndexContentList.add(binningIndexContent); } - final TabixIndex tabixIndex = new TabixIndex(formatSpec, sequenceNames, mergedBinningIndexContentList.toArray(new BinningIndexContent[0])); - try (LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(out, (File) null))) { + final TabixIndex tabixIndex = new TabixIndex( + formatSpec, sequenceNames, mergedBinningIndexContentList.toArray(new BinningIndexContent[0])); + try (LittleEndianOutputStream los = + new LittleEndianOutputStream(new BlockCompressedOutputStream(out, (File) null))) { tabixIndex.write(los); } } - private static BinningIndexContent mergeBinningIndexContent(final int referenceSequence, final List binningIndexContentList, final long[] offsets) { + private static BinningIndexContent mergeBinningIndexContent( + final int referenceSequence, + final List binningIndexContentList, + final long[] offsets) { final List binLists = new ArrayList<>(); final List linearIndexes = new ArrayList<>(); for (BinningIndexContent binningIndexContent : binningIndexContentList) { binLists.add(binningIndexContent == null ? null : binningIndexContent.getBins()); linearIndexes.add(binningIndexContent == null ? null : binningIndexContent.getLinearIndex()); } - return new BinningIndexContent(referenceSequence, BAMIndexMerger.mergeBins(binLists, offsets), BAMIndexMerger.mergeLinearIndexes(referenceSequence, linearIndexes, offsets)); + return new BinningIndexContent( + referenceSequence, + BAMIndexMerger.mergeBins(binLists, offsets), + BAMIndexMerger.mergeLinearIndexes(referenceSequence, linearIndexes, offsets)); } } diff --git a/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java b/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java index 31b9b7f497..030bb9e6c7 100644 --- a/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/AsciiLineReader.java @@ -21,7 +21,6 @@ import htsjdk.samtools.util.LocationAware; import htsjdk.samtools.util.Log; import htsjdk.tribble.TribbleException; - import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; @@ -31,7 +30,7 @@ * * {@link BufferedReader} and its {@link java.io.BufferedReader#readLine()} method should be used in preference to this class (when the * {@link htsjdk.samtools.util.LocationAware} functionality is not required) because it offers greater performance. - * + * * @author jrobinso */ public class AsciiLineReader implements LineReader, LocationAware { @@ -45,7 +44,8 @@ public class AsciiLineReader implements LineReader, LocationAware { private char[] lineBuffer; private int lineTerminatorLength = -1; - protected AsciiLineReader() {}; + protected AsciiLineReader() {} + ; /** * Note: This class implements LocationAware, which requires preservation of virtual file pointers on BGZF inputs. @@ -55,8 +55,9 @@ public class AsciiLineReader implements LineReader, LocationAware { * @deprecated 8/8/2017 use {@link #from} */ @Deprecated - public AsciiLineReader(final InputStream is){ - // NOTE: This will wrap the input stream in a PositionalBufferedStream even if its already a PositionalBufferedStream + public AsciiLineReader(final InputStream is) { + // NOTE: This will wrap the input stream in a PositionalBufferedStream even if its already a + // PositionalBufferedStream this(new PositionalBufferedStream(is)); } @@ -86,11 +87,12 @@ public static AsciiLineReader from(final InputStream inputStream) { return new BlockCompressedAsciiLineReader((BlockCompressedInputStream) inputStream); } else if (inputStream instanceof PositionalBufferedStream) { // if this is already a PositionalBufferedStream, don't let AsciiLineReader wrap it with another one... - return new AsciiLineReader((PositionalBufferedStream)inputStream); + return new AsciiLineReader((PositionalBufferedStream) inputStream); } else { - log.warn("Creating an indexable source for an AsciiFeatureCodec using a stream that is " + - "neither a PositionalBufferedStream nor a BlockCompressedInputStream"); - return new AsciiLineReader(new PositionalBufferedStream(inputStream)); // wrap the stream in a PositionalBufferedStream + log.warn("Creating an indexable source for an AsciiFeatureCodec using a stream that is " + + "neither a PositionalBufferedStream nor a BlockCompressedInputStream"); + return new AsciiLineReader( + new PositionalBufferedStream(inputStream)); // wrap the stream in a PositionalBufferedStream } } @@ -98,9 +100,10 @@ public static AsciiLineReader from(final InputStream inputStream) { * @return The position of the InputStream */ @Override - public long getPosition(){ - if(is == null){ - throw new TribbleException("getPosition() called but no default stream was provided to the class on creation"); + public long getPosition() { + if (is == null) { + throw new TribbleException( + "getPosition() called but no default stream was provided to the class on creation"); } return is.getPosition(); } @@ -127,7 +130,7 @@ public int getLineTerminatorLength() { * end of the stream has been reached */ @Deprecated - public String readLine(final PositionalBufferedStream stream) throws IOException{ + public String readLine(final PositionalBufferedStream stream) throws IOException { int linePosition = 0; while (true) { @@ -148,8 +151,7 @@ public String readLine(final PositionalBufferedStream stream) throws IOException if (c == CARRIAGE_RETURN && stream.peek() == LINEFEED) { stream.read(); // <= skip the trailing \n in case of \r\n termination this.lineTerminatorLength = 2; - } - else { + } else { this.lineTerminatorLength = 1; } @@ -175,22 +177,22 @@ public String readLine(final PositionalBufferedStream stream) throws IOException * @return The next string, or null when input is exhausted. */ @Override - public String readLine() throws IOException{ - if ( is == null ){ - throw new TribbleException("readLine() called without an explicit stream argument but no default stream was provided to the class on creation"); + public String readLine() throws IOException { + if (is == null) { + throw new TribbleException( + "readLine() called without an explicit stream argument but no default stream was provided to the class on creation"); } return readLine(is); } @Override public void close() { - if ( is != null ) is.close(); + if (is != null) is.close(); lineBuffer = null; } - + @Override public String toString() { - return "AsciiLineReader("+(this.is == null ? "closed" : String.valueOf(this.is.getPosition())) +")"; + return "AsciiLineReader(" + (this.is == null ? "closed" : String.valueOf(this.is.getPosition())) + ")"; } } - diff --git a/src/main/java/htsjdk/tribble/readers/AsciiLineReaderIterator.java b/src/main/java/htsjdk/tribble/readers/AsciiLineReaderIterator.java index a36797e21f..6a5b7588b6 100644 --- a/src/main/java/htsjdk/tribble/readers/AsciiLineReaderIterator.java +++ b/src/main/java/htsjdk/tribble/readers/AsciiLineReaderIterator.java @@ -5,22 +5,21 @@ import htsjdk.samtools.util.LocationAware; import htsjdk.samtools.util.RuntimeIOException; import htsjdk.samtools.util.Tuple; - import java.io.Closeable; import java.io.IOException; /** * A class that iterates over the lines and line positions in an {@link AsciiLineReader}. - * - * This class is slower than other {@link LineIterator}s because it is driven by {@link AsciiLineReader}, but offers the benefit of + * + * This class is slower than other {@link LineIterator}s because it is driven by {@link AsciiLineReader}, but offers the benefit of * implementing {@link htsjdk.samtools.util.LocationAware}, which is required for indexing. If you do not require {@link htsjdk.samtools.util.LocationAware}, consider using * {@link LineIteratorImpl} as an alternative to this class. - * + * * Note an important distinction in the way this class and its inner iterator differ: in the inner iterator, the position stored with * a line is the position at the start of that line. However, {@link #getPosition()} of the outer class must return the position at the * end of the most-recently-returned line (or the start of the underlying {@link AsciiLineReader}, if no line has been read). The latter * bit of logic here is required to conform with the interface described by {@link htsjdk.samtools.util.LocationAware#getPosition()}. - * + * * @author mccowan */ public class AsciiLineReaderIterator implements LocationAware, LineIterator, Closeable { @@ -73,15 +72,18 @@ public String peek() { * class can't do both). */ private class TupleIterator extends AbstractIterator> implements LocationAware { - + public TupleIterator() { - super.hasNext(); // Initialize the iterator, which appears to be a requirement of the parent class. TODO: Really? + super.hasNext(); // Initialize the iterator, which appears to be a requirement of the parent class. + // TODO: Really? } - + @Override protected Tuple advance() { final String line; - final long position = asciiLineReader.getPosition(); // A line's position is where it starts, so get it before reading the line. + final long position = + asciiLineReader + .getPosition(); // A line's position is where it starts, so get it before reading the line. try { line = asciiLineReader.readLine(); } catch (IOException e) { diff --git a/src/main/java/htsjdk/tribble/readers/BlockCompressedAsciiLineReader.java b/src/main/java/htsjdk/tribble/readers/BlockCompressedAsciiLineReader.java index b4f05a3b66..d77e5cdb9e 100644 --- a/src/main/java/htsjdk/tribble/readers/BlockCompressedAsciiLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/BlockCompressedAsciiLineReader.java @@ -2,7 +2,6 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.CloserUtil; - import java.io.IOException; /** @@ -11,7 +10,7 @@ */ class BlockCompressedAsciiLineReader extends AsciiLineReader { - final private BlockCompressedInputStream bcs; + private final BlockCompressedInputStream bcs; public BlockCompressedAsciiLineReader(final BlockCompressedInputStream inputBlockCompressedStream) { bcs = inputBlockCompressedStream; @@ -23,11 +22,13 @@ public BlockCompressedAsciiLineReader(final BlockCompressedInputStream inputBloc @Override public String readLine() throws IOException { return bcs.readLine(); - }; + } + ; @Override public String readLine(final PositionalBufferedStream stream) { - throw new UnsupportedOperationException("A BlockCompressedAsciiLineReader class cannot be used to read from a PositionalBufferedStream"); + throw new UnsupportedOperationException( + "A BlockCompressedAsciiLineReader class cannot be used to read from a PositionalBufferedStream"); } @Override diff --git a/src/main/java/htsjdk/tribble/readers/LineIteratorImpl.java b/src/main/java/htsjdk/tribble/readers/LineIteratorImpl.java index fda9aaf4a6..d092de8e0d 100644 --- a/src/main/java/htsjdk/tribble/readers/LineIteratorImpl.java +++ b/src/main/java/htsjdk/tribble/readers/LineIteratorImpl.java @@ -3,7 +3,6 @@ import htsjdk.samtools.util.AbstractIterator; import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.RuntimeIOException; - import java.io.Closeable; import java.io.IOException; @@ -31,9 +30,9 @@ protected String advance() { public void close() throws IOException { CloserUtil.close(lineReader); } - + @Override public String toString() { - return "LineIteratorImpl(" + this.lineReader+")"; + return "LineIteratorImpl(" + this.lineReader + ")"; } } diff --git a/src/main/java/htsjdk/tribble/readers/LineReader.java b/src/main/java/htsjdk/tribble/readers/LineReader.java index 2782afc96b..e552e11c8b 100644 --- a/src/main/java/htsjdk/tribble/readers/LineReader.java +++ b/src/main/java/htsjdk/tribble/readers/LineReader.java @@ -38,7 +38,6 @@ public interface LineReader extends Closeable { */ public String readLine() throws IOException; - @Override public void close(); } diff --git a/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java b/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java index dbb6593438..1e13a5cc2b 100644 --- a/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java +++ b/src/main/java/htsjdk/tribble/readers/LongLineBufferedReader.java @@ -24,17 +24,16 @@ package htsjdk.tribble.readers; - import java.io.IOException; import java.io.Reader; /** * A variant of {@link java.io.BufferedReader} with improved performance reading files with long lines. - * + * * This class is almost identical to BufferedReader, but it retains a single line buffer for accumulating characters in a line, and allows * its size to grow. Conversely, {@link java.io.BufferedReader} assumes each new line will be approximately 80 characters and its * performance suffers when that is not the case. - * + * * Unlike {@link java.io.BufferedReader}, this class is not thread safe. * * @author mccowan @@ -72,12 +71,11 @@ public class LongLineBufferedReader extends Reader { * * @param in A Reader * @param sz Input-buffer size - * @throws IllegalArgumentException If sz is <= 0 + * @throws IllegalArgumentException If sz is {@code <= 0} */ public LongLineBufferedReader(Reader in, int sz) { super(in); - if (sz <= 0) - throw new IllegalArgumentException("Buffer size <= 0"); + if (sz <= 0) throw new IllegalArgumentException("Buffer size <= 0"); this.in = in; cb = new char[sz]; nextChar = nChars = 0; @@ -97,8 +95,7 @@ public LongLineBufferedReader(Reader in) { * Checks to make sure that the stream has not been closed */ private void ensureOpen() throws IOException { - if (in == null) - throw new IOException("Stream closed"); + if (in == null) throw new IOException("Stream closed"); } /** @@ -107,24 +104,24 @@ private void ensureOpen() throws IOException { private void fill() throws IOException { int dst; if (markedChar <= UNMARKED) { - /* No mark */ + /* No mark */ dst = 0; } else { - /* Marked */ + /* Marked */ int delta = nextChar - markedChar; if (delta >= readAheadLimit) { - /* Gone past read-ahead limit: Invalidate mark */ + /* Gone past read-ahead limit: Invalidate mark */ markedChar = INVALIDATED; readAheadLimit = 0; dst = 0; } else { if (readAheadLimit <= cb.length) { - /* Shuffle in the current buffer */ + /* Shuffle in the current buffer */ System.arraycopy(cb, markedChar, cb, 0, delta); markedChar = 0; dst = delta; } else { - /* Reallocate buffer to accommodate read-ahead limit */ + /* Reallocate buffer to accommodate read-ahead limit */ char ncb[] = new char[readAheadLimit]; System.arraycopy(cb, markedChar, ncb, 0, delta); cb = ncb; @@ -160,8 +157,7 @@ public int read() throws IOException { for (; ; ) { if (nextChar >= nChars) { fill(); - if (nextChar >= nChars) - return -1; + if (nextChar >= nChars) return -1; } if (skipLF) { skipLF = false; @@ -181,11 +177,11 @@ public int read() throws IOException { */ private int read1(char[] cbuf, int off, int len) throws IOException { if (nextChar >= nChars) { - /* If the requested length is at least as large as the buffer, and - if there is no mark/reset activity, and if line feeds are not - being skipped, do not bother to copy the characters into the - local buffer. In this way buffered streams will cascade - harmlessly. */ + /* If the requested length is at least as large as the buffer, and + if there is no mark/reset activity, and if line feeds are not + being skipped, do not bother to copy the characters into the + local buffer. In this way buffered streams will cascade + harmlessly. */ if (len >= cb.length && markedChar <= UNMARKED && !skipLF) { return in.read(cbuf, off, len); } @@ -196,10 +192,8 @@ private int read1(char[] cbuf, int off, int len) throws IOException { skipLF = false; if (cb[nextChar] == '\n') { nextChar++; - if (nextChar >= nChars) - fill(); - if (nextChar >= nChars) - return -1; + if (nextChar >= nChars) fill(); + if (nextChar >= nChars) return -1; } } int n = Math.min(len, nChars - nextChar); @@ -255,8 +249,7 @@ private int read1(char[] cbuf, int off, int len) throws IOException { public int read(char cbuf[], int off, int len) throws IOException { synchronized (lock) { ensureOpen(); - if ((off < 0) || (off > cbuf.length) || (len < 0) || - ((off + len) > cbuf.length) || ((off + len) < 0)) { + if ((off < 0) || (off > cbuf.length) || (len < 0) || ((off + len) > cbuf.length) || ((off + len) < 0)) { throw new IndexOutOfBoundsException(); } else if (len == 0) { return 0; @@ -288,7 +281,7 @@ public int read(char cbuf[], int off, int len) throws IOException { String readLine(boolean ignoreLF) throws IOException { int startChar; lineBuffer.setLength(0); - + synchronized (lock) { ensureOpen(); boolean omitLF = ignoreLF || skipLF; @@ -296,21 +289,18 @@ String readLine(boolean ignoreLF) throws IOException { bufferLoop: for (; ; ) { - if (nextChar >= nChars) - fill(); - if (nextChar >= nChars) { /* EOF */ - if (lineBuffer != null && lineBuffer.length() > 0) - return lineBuffer.toString(); - else - return null; + if (nextChar >= nChars) fill(); + if (nextChar >= nChars) { + /* EOF */ + if (lineBuffer != null && lineBuffer.length() > 0) return lineBuffer.toString(); + else return null; } boolean eol = false; char c = 0; int i; /* Skip a leftover '\n', if necessary */ - if (omitLF && (cb[nextChar] == '\n')) - nextChar++; + if (omitLF && (cb[nextChar] == '\n')) nextChar++; skipLF = false; omitLF = false; @@ -373,10 +363,8 @@ public long skip(long n) throws IOException { ensureOpen(); long r = n; while (r > 0) { - if (nextChar >= nChars) - fill(); - if (nextChar >= nChars) /* EOF */ - break; + if (nextChar >= nChars) fill(); + if (nextChar >= nChars) /* EOF */ break; if (skipLF) { skipLF = false; if (cb[nextChar] == '\n') { @@ -409,20 +397,19 @@ public boolean ready() throws IOException { synchronized (lock) { ensureOpen(); - /* - * If newline needs to be skipped and the next char to be read - * is a newline character, then just skip it right away. - */ + /* + * If newline needs to be skipped and the next char to be read + * is a newline character, then just skip it right away. + */ if (skipLF) { - /* Note that in.ready() will return true if and only if the next - * read on the stream will not block. - */ + /* Note that in.ready() will return true if and only if the next + * read on the stream will not block. + */ if (nextChar >= nChars && in.ready()) { fill(); } if (nextChar < nChars) { - if (cb[nextChar] == '\n') - nextChar++; + if (cb[nextChar] == '\n') nextChar++; skipLF = false; } } @@ -450,7 +437,7 @@ public boolean markSupported() { * buffer will cause a new buffer to be allocated * whose size is no smaller than limit. * Therefore large values should be used with care. - * @throws IllegalArgumentException If readAheadLimit is < 0 + * @throws IllegalArgumentException If readAheadLimit is {@code < 0} * @throws IOException If an I/O error occurs */ @Override @@ -477,9 +464,7 @@ public void reset() throws IOException { synchronized (lock) { ensureOpen(); if (markedChar < 0) - throw new IOException((markedChar == INVALIDATED) - ? "Mark invalid" - : "Stream not marked"); + throw new IOException((markedChar == INVALIDATED) ? "Mark invalid" : "Stream not marked"); nextChar = markedChar; skipLF = markedSkipLF; } @@ -488,8 +473,7 @@ public void reset() throws IOException { @Override public void close() throws IOException { synchronized (lock) { - if (in == null) - return; + if (in == null) return; in.close(); in = null; cb = null; diff --git a/src/main/java/htsjdk/tribble/readers/Positional.java b/src/main/java/htsjdk/tribble/readers/Positional.java index 50f403947f..4c9588fb18 100644 --- a/src/main/java/htsjdk/tribble/readers/Positional.java +++ b/src/main/java/htsjdk/tribble/readers/Positional.java @@ -24,13 +24,12 @@ package htsjdk.tribble.readers; import htsjdk.samtools.util.LocationAware; - import java.io.IOException; /** * Minimal interface for an object at support getting the current position in the stream / writer / file, as well as a handful of other * reader-like features. - * + * * @author depristo */ public interface Positional extends LocationAware { diff --git a/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java b/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java index 0b77f6aaa3..69e18d75d3 100644 --- a/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java +++ b/src/main/java/htsjdk/tribble/readers/PositionalBufferedStream.java @@ -18,7 +18,6 @@ package htsjdk.tribble.readers; import htsjdk.tribble.TribbleException; - import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; @@ -60,7 +59,7 @@ public final long getPosition() { @Override public final int read() throws IOException { final int c = peek(); - if ( c >= 0 ) { + if (c >= 0) { // update position and buffer offset if peek says we aren't yet done position++; nextChar++; @@ -70,18 +69,18 @@ public final int read() throws IOException { @Override public final int read(final byte[] bytes, final int start, final int len) throws IOException { - if ( len == 0 ) // If len is zero, then no bytes are read and 0 is returned - return 0; + if (len == 0) // If len is zero, then no bytes are read and 0 is returned + return 0; if (nChars < 0) // If no byte is available because the stream is at end of file, the value -1 is returned - return -1; + return -1; else { int nRead = 0; int remaining = len; - while ( remaining > 0 ) { + while (remaining > 0) { // Try to Refill buffer if at the end of current buffer - if ( nChars == nextChar ) - if ( fill() < 0 ) { // EOF + if (nChars == nextChar) + if (fill() < 0) { // EOF break; } @@ -90,14 +89,14 @@ public final int read(final byte[] bytes, final int start, final int len) throws System.arraycopy(buffer, nextChar, bytes, start + nRead, nCharsToCopy); // update nextChar (pointer into buffer) and keep track of nRead and remaining - nextChar += nCharsToCopy; - nRead += nCharsToCopy; + nextChar += nCharsToCopy; + nRead += nCharsToCopy; remaining -= nCharsToCopy; } // make sure we update our position tracker to reflect having advanced by nRead bytes position += nRead; - + /** Conform to {@link InputStream#read(byte[], int, int)} contract by returning -1 if EOF and no data was read. */ return nRead == 0 ? -1 : nRead; } @@ -118,9 +117,9 @@ public final int peek() throws IOException { // Check for EOF if (nChars < 0) { return -1; - } else if (nextChar == nChars){ - //Try to Refill buffer if at the end of current buffer - if ( fill() < 0 ){ + } else if (nextChar == nChars) { + // Try to Refill buffer if at the end of current buffer + if (fill() < 0) { return -1; } } @@ -143,9 +142,9 @@ public final long skip(final long nBytes) throws IOException { // When the buffer contains enough data that we have less than // its less left to skip we increase nextChar by the remaining // amount - while ( remainingToSkip > 0 && ! isDone() ) { + while (remainingToSkip > 0 && !isDone()) { final long bytesLeftInBuffer = nChars - nextChar; - if ( remainingToSkip > bytesLeftInBuffer ) { + if (remainingToSkip > bytesLeftInBuffer) { // we need to refill the buffer and continue our skipping remainingToSkip -= bytesLeftInBuffer; fill(); @@ -171,7 +170,7 @@ public final void close() { } } - private final static int byteToInt(byte b) { + private static final int byteToInt(byte b) { return b & 0xFF; } @@ -183,20 +182,16 @@ public static void main(String[] args) throws Exception { System.out.printf("Testing %s%n", args[0]); for (int i = 0; i < iterations; i++) { - if ( includeInputStream ) { + if (includeInputStream) { final InputStream is = new FileInputStream(testFile); - if ( doReadFileInChunks ) - readFileInChunks("InputStream", is); - else - readFileByLine("InputStream", is); + if (doReadFileInChunks) readFileInChunks("InputStream", is); + else readFileByLine("InputStream", is); is.close(); } final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(testFile)); - if ( doReadFileInChunks ) - readFileInChunks("PositionalBufferedStream", pbs); - else - readFileByLine("PositionalBufferedStream", pbs); + if (doReadFileInChunks) readFileInChunks("PositionalBufferedStream", pbs); + else readFileByLine("PositionalBufferedStream", pbs); pbs.close(); } } @@ -227,10 +222,8 @@ private static void readFileInChunks(final String name, final InputStream is) th is.close(); } - private static final void printStatus(final String name, long lineCount, double rate, long dt) { System.out.printf("%30s: %d lines read. Rate = %.2e lines per second. DT = %d%n", name, lineCount, rate, dt); System.out.flush(); } } - diff --git a/src/main/java/htsjdk/tribble/readers/SynchronousLineReader.java b/src/main/java/htsjdk/tribble/readers/SynchronousLineReader.java index 32948ebf4e..3fd628efcf 100644 --- a/src/main/java/htsjdk/tribble/readers/SynchronousLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/SynchronousLineReader.java @@ -25,7 +25,6 @@ import htsjdk.samtools.util.CloserUtil; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; @@ -34,14 +33,14 @@ /** * Implementation of {@link LineReader} that reads lines directly from the underlying stream or reader. */ -public final class SynchronousLineReader implements LineReader{ +public final class SynchronousLineReader implements LineReader { private final LongLineBufferedReader longLineBufferedReader; - public SynchronousLineReader(final InputStream stream){ + public SynchronousLineReader(final InputStream stream) { this(new InputStreamReader(stream)); } - public SynchronousLineReader(final Reader reader){ + public SynchronousLineReader(final Reader reader) { this.longLineBufferedReader = new LongLineBufferedReader(reader); } @@ -58,9 +57,9 @@ public String readLine() { public void close() { CloserUtil.close(longLineBufferedReader); } - + @Override public String toString() { return "SynchronousLineReader"; } -} \ No newline at end of file +} diff --git a/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java b/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java index cfb5b0fb99..92a93602d5 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixIteratorLineReader.java @@ -24,7 +24,6 @@ package htsjdk.tribble.readers; import htsjdk.samtools.util.RuntimeIOException; - import java.io.IOException; /** @@ -35,7 +34,6 @@ public class TabixIteratorLineReader implements LineReader { TabixReader.Iterator iterator; - public TabixIteratorLineReader(TabixReader.Iterator iterator) { this.iterator = iterator; } @@ -53,7 +51,7 @@ public String readLine() { public void close() { // Ignore - } - + @Override public String toString() { return "TabixIteratorLineReader"; diff --git a/src/main/java/htsjdk/tribble/readers/TabixReader.java b/src/main/java/htsjdk/tribble/readers/TabixReader.java index 0d474a6ab7..8c2b08386d 100644 --- a/src/main/java/htsjdk/tribble/readers/TabixReader.java +++ b/src/main/java/htsjdk/tribble/readers/TabixReader.java @@ -29,7 +29,6 @@ import htsjdk.samtools.util.BlockCompressedInputStream; import htsjdk.samtools.util.FileExtensions; import htsjdk.tribble.util.ParsingUtils; - import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -56,14 +55,14 @@ public class TabixReader implements AutoCloseable { private int mBc; private int mEc; private int mMeta; - - //private int mSkip; (not used) + + // private int mSkip; (not used) private String[] mSeq; private Map mChr2tid; private static int MAX_BIN = 37450; - //private static int TAD_MIN_CHUNK_GAP = 32768; (not used) + // private static int TAD_MIN_CHUNK_GAP = 32768; (not used) private static int TAD_LIDX_SHIFT = 14; /** default buffer size for readLine() */ private static final int DEFAULT_BUFFER_SIZE = 1000; @@ -106,7 +105,11 @@ private static boolean less64(final long u, final long v) { // unsigned 64-bit c * @param filePath path to the data file/uri */ public TabixReader(final String filePath) throws IOException { - this(filePath, null, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); + this( + filePath, + null, + SeekableStreamFactory.getInstance() + .getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); } /** @@ -114,7 +117,11 @@ public TabixReader(final String filePath) throws IOException { * @param indexPath Full path to the index file. Auto-generated if null */ public TabixReader(final String filePath, final String indexPath) throws IOException { - this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); + this( + filePath, + indexPath, + SeekableStreamFactory.getInstance() + .getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath))); } /** @@ -123,13 +130,20 @@ public TabixReader(final String filePath, final String indexPath) throws IOExcep * @param wrapper a wrapper to apply to the raw byte stream of the data file if is a uri representing a {@link java.nio.file.Path} * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} */ - public TabixReader(final String filePath, final String indexPath, - final Function wrapper, - final Function indexWrapper) throws IOException { - this(filePath, indexPath, SeekableStreamFactory.getInstance().getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath, wrapper)), indexWrapper); + public TabixReader( + final String filePath, + final String indexPath, + final Function wrapper, + final Function indexWrapper) + throws IOException { + this( + filePath, + indexPath, + SeekableStreamFactory.getInstance() + .getBufferedStream(SeekableStreamFactory.getInstance().getStreamFor(filePath, wrapper)), + indexWrapper); } - /** * @param filePath Path to the data file (used for error messages only) * @param stream Seekable stream from which the data is read @@ -153,11 +167,16 @@ public TabixReader(final String filePath, final String indexPath, SeekableStream * @param indexWrapper a wrapper to apply to the raw byte stream of the index file if it is a uri representing a {@link java.nio.file.Path} * @param stream Seekable stream from which the data is read */ - public TabixReader(final String filePath, final String indexPath, SeekableStream stream, Function indexWrapper) throws IOException { + public TabixReader( + final String filePath, + final String indexPath, + SeekableStream stream, + Function indexWrapper) + throws IOException { mFilePath = filePath; mFp = new BlockCompressedInputStream(stream); mIndexWrapper = indexWrapper; - if(indexPath == null){ + if (indexPath == null) { mIndexPath = ParsingUtils.appendToPath(filePath, FileExtensions.TABIX_INDEX); } else { mIndexPath = indexPath; @@ -166,10 +185,9 @@ public TabixReader(final String filePath, final String indexPath, SeekableStream } /** return the source (filename/URL) of that reader */ - public String getSource() - { + public String getSource() { return this.mFilePath; - } + } private static int reg2bins(final int beg, final int _end, final int[] list) { int i = 0, k, end = _end; @@ -201,9 +219,9 @@ public static String readLine(final InputStream is) throws IOException { return readLine(is, DEFAULT_BUFFER_SIZE); } - /** - * reads a line with a defined buffer-size - * + /** + * reads a line with a defined buffer-size + * * @param is the input stream * @param bufferCapacity the buffer size, must be greater than 0 * @return the line or null if there is no more input @@ -212,14 +230,11 @@ public static String readLine(final InputStream is) throws IOException { private static String readLine(final InputStream is, final int bufferCapacity) throws IOException { final StringBuffer buf = new StringBuffer(bufferCapacity); int c; - while ((c = is.read()) >= 0 && c != '\n') - buf.append((char) c); + while ((c = is.read()) >= 0 && c != '\n') buf.append((char) c); if (c < 0) return null; return buf.toString(); } - - /** * Read the Tabix index from a file * @@ -227,18 +242,18 @@ private static String readLine(final InputStream is, final int bufferCapacity) t */ private void readIndex(final SeekableStream fp) throws IOException { if (fp == null) return; - final BlockCompressedInputStream is = new BlockCompressedInputStream(fp); + final BlockCompressedInputStream is = new BlockCompressedInputStream(fp); byte[] buf = new byte[4]; is.read(buf, 0, 4); // read "TBI\1" mSeq = new String[readInt(is)]; // # sequences - mChr2tid = new HashMap( this.mSeq.length ); + mChr2tid = new HashMap(this.mSeq.length); mPreset = readInt(is); mSc = readInt(is); mBc = readInt(is); mEc = readInt(is); mMeta = readInt(is); - readInt(is);//unused + readInt(is); // unused // read sequence dictionary int i, j, k, l = readInt(is); buf = new byte[l]; @@ -272,8 +287,7 @@ private void readIndex(final SeekableStream fp) throws IOException { } // the linear index mIndex[i].l = new long[readInt(is)]; - for (k = 0; k < mIndex[i].l.length; ++k) - mIndex[i].l[k] = readLong(is); + for (k = 0; k < mIndex[i].l.length; ++k) mIndex[i].l[k] = readLong(is); } // close is.close(); @@ -296,15 +310,14 @@ public String readLine() throws IOException { /** return chromosome ID or -1 if it is unknown */ public int chr2tid(final String chr) { - final Integer tid = this.mChr2tid.get(chr); - return tid==null?-1:tid; + final Integer tid = this.mChr2tid.get(chr); + return tid == null ? -1 : tid; } /** return the chromosomes in that tabix file */ - public Set getChromosomes() - { + public Set getChromosomes() { return Collections.unmodifiableSet(this.mChr2tid.keySet()); - } + } /** * Parse a region in the format of "chr1", "chr1:100" or "chr1:100-1000" @@ -341,8 +354,7 @@ private TIntv getIntv(final String s) { if (intv.end < 1) intv.end = 1; } else { // FIXME: SAM supports are not tested yet if ((mPreset & 0xffff) == 0) { // generic - if (col == mEc) - intv.end = Integer.parseInt(end != -1 ? s.substring(beg, end) : s.substring(beg)); + if (col == mEc) intv.end = Integer.parseInt(end != -1 ? s.substring(beg, end) : s.substring(beg)); } else if ((mPreset & 0xffff) == 1) { // SAM if (col == 6) { // CIGAR int l = 0, i, j; @@ -350,8 +362,7 @@ private TIntv getIntv(final String s) { for (i = j = 0; i < cigar.length(); ++i) { if (cigar.charAt(i) > '9') { int op = cigar.charAt(i); - if (op == 'M' || op == 'D' || op == 'N') - l += Integer.parseInt(cigar.substring(j, i)); + if (op == 'M' || op == 'D' || op == 'N') l += Integer.parseInt(cigar.substring(j, i)); j = i + 1; } } @@ -382,24 +393,23 @@ else if (i > 0) { return intv; } - public interface Iterator - { + public interface Iterator { /** return null when there is no more data to read */ public String next() throws IOException; - } + } /** iterator returned instead of null when there is no more data */ - private static final Iterator EOF_ITERATOR=new Iterator() { + private static final Iterator EOF_ITERATOR = new Iterator() { @Override public String next() throws IOException { return null; - } - }; + } + }; /** default implementation of Iterator */ private class IteratorImpl implements Iterator { private int i; - //private int n_seeks; + // private int n_seeks; private int tid, beg, end; private TPair64[] off; private long curr_off; @@ -407,7 +417,7 @@ private class IteratorImpl implements Iterator { private IteratorImpl(final int _tid, final int _beg, final int _end, final TPair64[] _off) { i = -1; - //n_seeks = 0; + // n_seeks = 0; curr_off = 0; iseof = false; off = _off; @@ -419,14 +429,14 @@ private IteratorImpl(final int _tid, final int _beg, final int _end, final TPair @Override public String next() throws IOException { if (iseof) return null; - for (; ;) { + for (; ; ) { if (curr_off == 0 || !less64(curr_off, off[i].v)) { // then jump to the next chunk if (i == off.length - 1) break; // no more chunks if (i >= 0) assert (curr_off == off[i].v); // otherwise bug if (i < 0 || off[i].v != off[i + 1].u) { // not adjacent chunks; then seek mFp.seek(off[i + 1].u); curr_off = mFp.getFilePointer(); - //++n_seeks; + // ++n_seeks; } ++i; } @@ -463,16 +473,14 @@ public Iterator query(final int tid, final int beg, final int end) { min_off = (beg >> TAD_LIDX_SHIFT >= idx.l.length) ? idx.l[idx.l.length - 1] : idx.l[beg >> TAD_LIDX_SHIFT]; else min_off = 0; for (i = n_off = 0; i < n_bins; ++i) { - if ((chunks = idx.b.get(bins[i])) != null) - n_off += chunks.length; + if ((chunks = idx.b.get(bins[i])) != null) n_off += chunks.length; } if (n_off == 0) return EOF_ITERATOR; off = new TPair64[n_off]; for (i = n_off = 0; i < n_bins; ++i) if ((chunks = idx.b.get(bins[i])) != null) for (int j = 0; j < chunks.length; ++j) - if (less64(min_off, chunks[j].v)) - off[n_off++] = new TPair64(chunks[j]); + if (less64(min_off, chunks[j].v)) off[n_off++] = new TPair64(chunks[j]); Arrays.sort(off, 0, n_off); // resolve completely contained adjacent blocks for (i = 1, l = 0; i < n_off; ++i) { @@ -484,8 +492,7 @@ public Iterator query(final int tid, final int beg, final int end) { } n_off = l + 1; // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing - for (i = 1; i < n_off; ++i) - if (!less64(off[i - 1].v, off[i].u)) off[i - 1].v = off[i].u; + for (i = 1; i < n_off; ++i) if (!less64(off[i - 1].v, off[i].u)) off[i - 1].v = off[i].u; // merge adjacent blocks for (i = 1, l = 0; i < n_off; ++i) { if (off[l].v >> 16 == off[i].u >> 16) off[l].v = off[i].v; @@ -501,8 +508,7 @@ public Iterator query(final int tid, final int beg, final int end) { for (i = 0; i < n_off; ++i) { if (off[i] != null) ret[i] = new TPair64(off[i].u, off[i].v); // in C, this is inefficient } - if (ret.length == 0 || (ret.length == 1 && ret[0] == null)) - return EOF_ITERATOR; + if (ret.length == 0 || (ret.length == 1 && ret[0] == null)) return EOF_ITERATOR; return new TabixReader.IteratorImpl(tid, beg, end, ret); } @@ -518,22 +524,22 @@ public Iterator query(final String reg) { } /** - * Get an iterator for an interval specified by the sequence id and begin and end coordinates - * @see #parseReg(String) - * @param reg a chromosome - * @param start start interval - * @param end end interval - * @return a tabix iterator over the specified interval - */ - public Iterator query(final String reg, int start, int end) { - int tid = this.chr2tid(reg); - return query(tid, start, end); - } + * Get an iterator for an interval specified by the sequence id and begin and end coordinates + * @see #parseReg(String) + * @param reg a chromosome + * @param start start interval + * @param end end interval + * @return a tabix iterator over the specified interval + */ + public Iterator query(final String reg, int start, int end) { + int tid = this.chr2tid(reg); + return query(tid, start, end); + } // ADDED BY JTR - @Override - public void close() { - if(mFp != null) { + @Override + public void close() { + if (mFp != null) { try { mFp.close(); } catch (IOException e) { @@ -542,8 +548,8 @@ public void close() { } } - @Override - public String toString() { - return "TabixReader: filename:"+getSource(); - } + @Override + public String toString() { + return "TabixReader: filename:" + getSource(); + } } diff --git a/src/main/java/htsjdk/tribble/util/FTPHelper.java b/src/main/java/htsjdk/tribble/util/FTPHelper.java index 19b5f77bf1..b01f5019af 100644 --- a/src/main/java/htsjdk/tribble/util/FTPHelper.java +++ b/src/main/java/htsjdk/tribble/util/FTPHelper.java @@ -3,7 +3,6 @@ import htsjdk.samtools.util.ftp.FTPClient; import htsjdk.samtools.util.ftp.FTPStream; import htsjdk.samtools.util.ftp.FTPUtils; - import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; @@ -19,7 +18,8 @@ public class FTPHelper implements URLHelper { public FTPHelper(URL url) { if (!url.getProtocol().toLowerCase().equals("ftp")) { - throw new IllegalArgumentException("FTPHelper can only be used with ftp protocol, not " + url.getProtocol()); + throw new IllegalArgumentException( + "FTPHelper can only be used with ftp protocol, not " + url.getProtocol()); } this.url = url; } diff --git a/src/main/java/htsjdk/tribble/util/HTTPHelper.java b/src/main/java/htsjdk/tribble/util/HTTPHelper.java index 801b19c505..6a939d6de3 100644 --- a/src/main/java/htsjdk/tribble/util/HTTPHelper.java +++ b/src/main/java/htsjdk/tribble/util/HTTPHelper.java @@ -85,14 +85,12 @@ public long getContentLength() throws IOException { } } - @Override public InputStream openInputStream() throws IOException { HttpURLConnection connection = openConnection(); return new WrapperInputStream(connection, connection.getInputStream()); } - /** * Open an InputStream to stream a slice (range) of the resource. The host server must support * range byte requests and return a 206 response code (partial response). If it does not an IOException will @@ -160,6 +158,4 @@ public void close() throws IOException { connection.disconnect(); } } - - } diff --git a/src/main/java/htsjdk/tribble/util/LittleEndianInputStream.java b/src/main/java/htsjdk/tribble/util/LittleEndianInputStream.java index 350f47c830..83e784d241 100644 --- a/src/main/java/htsjdk/tribble/util/LittleEndianInputStream.java +++ b/src/main/java/htsjdk/tribble/util/LittleEndianInputStream.java @@ -1,14 +1,14 @@ /* -* Adapted from example code in -* Title: Hardcore Java -* Title: Java I/O -* Second Edition: May 2006 -* ISBN 10: 0-596-52750-0 -* ISBN 13: 9780596527501 -* -* http://www.javafaq.nu/java-example-code-1078.html -* -*/ + * Adapted from example code in + * Title: Hardcore Java + * Title: Java I/O + * Second Edition: May 2006 + * ISBN 10: 0-596-52750-0 + * ISBN 13: 9780596527501 + * + * http://www.javafaq.nu/java-example-code-1078.html + * + */ package htsjdk.tribble.util; import java.io.ByteArrayOutputStream; @@ -17,7 +17,6 @@ import java.io.IOException; import java.io.InputStream; - /** * Input stream with methods to convert byte arrays to numeric values using "little endian" order. *

    @@ -30,7 +29,6 @@ public class LittleEndianInputStream extends FilterInputStream { public LittleEndianInputStream(InputStream in) { super(in); buffer = new byte[8]; - } /** @@ -56,7 +54,6 @@ public short readShort() throws IOException { return (short) (((byte2 << 24) >>> 16) + ((byte1 << 24) >>> 24)); } - public int readInt() throws IOException { int byte1 = in.read(); int byte2 = in.read(); @@ -65,17 +62,12 @@ public int readInt() throws IOException { if (byte4 < 0) { throw new EOFException(); } - return (byte4 << 24) - + ((byte3 << 24) >>> 8) - + ((byte2 << 24) >>> 16) - + ((byte1 << 24) >>> 24); - + return (byte4 << 24) + ((byte3 << 24) >>> 8) + ((byte2 << 24) >>> 16) + ((byte1 << 24) >>> 24); } /** * */ - public long readLong() throws IOException { readFully(buffer); long byte1 = (long) buffer[0]; @@ -110,12 +102,11 @@ public final float readFloat() throws IOException { * @return * @throws IOException */ - public String readString() throws IOException { ByteArrayOutputStream bis = new ByteArrayOutputStream(100); byte b; while ((b = (byte) in.read()) != 0) { - if(b < 0) { + if (b < 0) { throw new EOFException(); } bis.write(b); @@ -123,22 +114,17 @@ public String readString() throws IOException { return new String(bis.toByteArray()); } - /** * Keep reading until the input buffer is filled. */ private void readFully(byte b[]) throws IOException { int len = b.length; - if (len < 0) - throw new IndexOutOfBoundsException(); + if (len < 0) throw new IndexOutOfBoundsException(); int n = 0; while (n < len) { int count = read(b, n, len - n); - if (count < 0) - throw new EOFException(); + if (count < 0) throw new EOFException(); n += count; } } - - } diff --git a/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java b/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java index eab2f87856..16c42526a6 100644 --- a/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java +++ b/src/main/java/htsjdk/tribble/util/LittleEndianOutputStream.java @@ -1,14 +1,14 @@ /* -* Adapted from example code in -* Title: Hardcore Java -* Title: Java I/O -* Second Edition: May 2006 -* ISBN 10: 0-596-52750-0 -* ISBN 13: 9780596527501 -* -* http://www.javafaq.nu/java-example-code-1078.html -* -*/ + * Adapted from example code in + * Title: Hardcore Java + * Title: Java I/O + * Second Edition: May 2006 + * ISBN 10: 0-596-52750-0 + * ISBN 13: 9780596527501 + * + * http://www.javafaq.nu/java-example-code-1078.html + * + */ package htsjdk.tribble.util; @@ -16,7 +16,6 @@ import java.io.IOException; import java.io.OutputStream; - public final class LittleEndianOutputStream extends FilterOutputStream { protected long written; @@ -32,8 +31,7 @@ public void write(int b) throws IOException { } @Override - public void write(byte[] data, int offset, int length) - throws IOException { + public void write(byte[] data, int offset, int length) throws IOException { out.write(data, offset, length); written += length; } @@ -67,7 +65,6 @@ public void writeInt(int i) throws IOException { out.write((i >>> 16) & 0xFF); out.write((i >>> 24) & 0xFF); written += 4; - } public void writeLong(long l) throws IOException { @@ -81,7 +78,6 @@ public void writeLong(long l) throws IOException { out.write((int) (l >>> 48) & 0xFF); out.write((int) (l >>> 56) & 0xFF); written += 8; - } public final void writeFloat(float f) throws IOException { @@ -119,4 +115,4 @@ public long getWrittenCount() { public void setWrittenCount(long count) { this.written = count; } -}// end LittleEndianOutputStream +} // end LittleEndianOutputStream diff --git a/src/main/java/htsjdk/tribble/util/MathUtils.java b/src/main/java/htsjdk/tribble/util/MathUtils.java index 9b4d8cec5b..7a09f46f8f 100644 --- a/src/main/java/htsjdk/tribble/util/MathUtils.java +++ b/src/main/java/htsjdk/tribble/util/MathUtils.java @@ -56,11 +56,24 @@ public void push(double x) { } } - public void clear() { recordCount = 0; } - public final long numDataValues() { return recordCount; } - public final double mean() { return (recordCount > 0) ? newMean : 0.0; } - public double variance() { return ((recordCount > 1) ? newStdDev / (recordCount - 1) : 0.0); } - public double standardDeviation() { return Math.sqrt(variance()); } - } + public void clear() { + recordCount = 0; + } + + public final long numDataValues() { + return recordCount; + } + + public final double mean() { + return (recordCount > 0) ? newMean : 0.0; + } + + public double variance() { + return ((recordCount > 1) ? newStdDev / (recordCount - 1) : 0.0); + } + public double standardDeviation() { + return Math.sqrt(variance()); + } + } } diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java index 7eaefb730d..fafbb7eae2 100644 --- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java +++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java @@ -28,15 +28,12 @@ import htsjdk.samtools.seekablestream.SeekablePathStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.util.IOUtil; - import java.awt.Color; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; -import java.net.URI; import java.net.URL; -import java.net.URLEncoder; import java.nio.channels.SeekableByteChannel; import java.nio.file.Files; import java.util.*; @@ -79,8 +76,7 @@ public class ParsingUtils { * @return an input stream from the given path * @throws IOException */ - public static InputStream openInputStream(String path) - throws IOException { + public static InputStream openInputStream(String path) throws IOException { return openInputStream(path, null); } @@ -96,19 +92,19 @@ public static InputStream openInputStream(String path) * @return An inputStream appropriately created from uri and conditionally wrapped with wrapper (only in certain cases) * @throws IOException when stream cannot be opened against uri */ - public static InputStream openInputStream(final String uri, final Function wrapper) - throws IOException { + public static InputStream openInputStream( + final String uri, final Function wrapper) throws IOException { final IOPath path = new HtsPath(uri); - if(path.hasFileSystemProvider()){ - if(path.isPath()) { + if (path.hasFileSystemProvider()) { + if (path.isPath()) { return path.getScheme().equals("file") ? Files.newInputStream(path.toPath()) : new SeekablePathStream(path.toPath(), wrapper); } else { - throw new IOException("FileSystemProvider for path " + path.getRawInputString() + " exits but failed to " + - " create path. \n" + path.getToPathFailureReason()); + throw new IOException("FileSystemProvider for path " + path.getRawInputString() + + " exits but failed to " + " create path. \n" + path.getToPathFailureReason()); } - } else if( SeekableStreamFactory.canBeHandledByLegacyUrlSupport(uri)){ + } else if (SeekableStreamFactory.canBeHandledByLegacyUrlSupport(uri)) { return getURLHelper(new URL(uri)).openInputStream(); } else { throw new IOException("No FileSystemProvider available to handle path: " + path.getRawInputString()); @@ -187,7 +183,6 @@ public static String join(String separator, String[] strings, int start, int end return ret.toString(); } - /** * Split the string into tokens separated by the given delimiter. This looks * suspiciously like what String.split should do. It is here because @@ -204,16 +199,13 @@ public static List split(String input, char delim) { if (input.isEmpty()) return Arrays.asList(""); final ArrayList output = new ArrayList<>(1 + input.length() / 2); int from = -1, to; - for (to = input.indexOf(delim); - to >= 0; - from = to, to = input.indexOf(delim, from+1)) { - output.add(input.substring(from+1, to)); + for (to = input.indexOf(delim); to >= 0; from = to, to = input.indexOf(delim, from + 1)) { + output.add(input.substring(from + 1, to)); } - output.add(input.substring(from+1)); + output.add(input.substring(from + 1)); return output; } - /** * Split the string into tokesn separated by the given delimiter. Profiling has * revealed that the standard string.split() method typically takes > 1/2 @@ -280,23 +272,19 @@ else if (nTokens < maxTokens) { return nTokens; } - // trim a string for the given character (i.e. not just whitespace) public static String trim(String str, char ch) { char[] array = str.toCharArray(); int start = 0; - while (start < array.length && array[start] == ch) - start++; + while (start < array.length && array[start] == ch) start++; int end = array.length - 1; - while (end > start && array[end] == ch) - end--; + while (end > start && array[end] == ch) end--; return str.substring(start, end + 1); } - /** * Split the string into tokens separated by tab or space(s). This method * was added so support wig and bed files, which apparently accept space delimiters. @@ -316,7 +304,8 @@ public static int splitWhitespace(String aString, String[] tokens) { int spaceEnd = aString.indexOf(' '); int end = tabEnd < 0 ? spaceEnd : spaceEnd < 0 ? tabEnd : Math.min(spaceEnd, tabEnd); while ((end > 0) && (nTokens < maxTokens)) { - //tokens[nTokens++] = new String(aString.toCharArray(), start, end-start); // aString.substring(start, end); + // tokens[nTokens++] = new String(aString.toCharArray(), start, end-start); // aString.substring(start, + // end); tokens[nTokens++] = aString.substring(start, end); start = end + 1; @@ -328,7 +317,6 @@ public static int splitWhitespace(String aString, String[] tokens) { tabEnd = aString.indexOf('\t', start); spaceEnd = aString.indexOf(' ', start); end = tabEnd < 0 ? spaceEnd : spaceEnd < 0 ? tabEnd : Math.min(spaceEnd, tabEnd); - } // Add the trailing string @@ -341,14 +329,12 @@ public static int splitWhitespace(String aString, String[] tokens) { public static > boolean isSorted(Iterable iterable) { Iterator iter = iterable.iterator(); - if (!iter.hasNext()) - return true; + if (!iter.hasNext()) return true; T t = iter.next(); while (iter.hasNext()) { T t2 = iter.next(); - if (t.compareTo(t2) > 0) - return false; + if (t.compareTo(t2) > 0) return false; t = t2; } @@ -389,12 +375,11 @@ public static Color parseColor(String string) { return c; } catch (NumberFormatException numberFormatException) { - //TODO Throw this exception? + // TODO Throw this exception? return Color.black; } } - private static Color hexToColor(String string) { if (string.length() == 6) { int red = Integer.parseInt(string.substring(0, 2), 16); @@ -404,10 +389,9 @@ private static Color hexToColor(String string) { } else { return null; } - } - public static boolean resourceExists(String resource) throws IOException{ + public static boolean resourceExists(String resource) throws IOException { boolean remoteFile = SeekableStreamFactory.isBeingHandledByLegacyUrlSupport(resource); if (remoteFile) { URL url; @@ -428,13 +412,13 @@ public static boolean resourceExists(String resource) throws IOException{ /** * Return a URLHelper from the current URLHelperFactory - * @see #setURLHelperFactory(URLHelperFactory) + * @see #setURLHelperFactory(URLHelperFactory) * * @param url * @return */ public static URLHelper getURLHelper(URL url) { - return urlHelperFactory.getHelper(url); + return urlHelperFactory.getHelper(url); } /** @@ -443,7 +427,7 @@ public static URLHelper getURLHelper(URL url) { * @param factory */ public static void setURLHelperFactory(URLHelperFactory factory) { - if(factory == null) { + if (factory == null) { throw new NullPointerException("Null URLHelperFactory"); } urlHelperFactory = factory; @@ -466,10 +450,10 @@ public static URLHelperFactory getURLHelperFactory() { public static String appendToPath(String filepath, String indexExtension) { String tabxIndex = null; URL url = null; - try{ + try { url = new URL(filepath); - }catch (MalformedURLException e){ - //pass + } catch (MalformedURLException e) { + // pass } if (url != null) { String path = url.getPath(); diff --git a/src/main/java/htsjdk/tribble/util/TabixUtils.java b/src/main/java/htsjdk/tribble/util/TabixUtils.java index de5e466052..1c9fe68452 100644 --- a/src/main/java/htsjdk/tribble/util/TabixUtils.java +++ b/src/main/java/htsjdk/tribble/util/TabixUtils.java @@ -29,7 +29,6 @@ import htsjdk.samtools.util.FileExtensions; import htsjdk.tribble.TribbleException; import htsjdk.tribble.readers.TabixReader; - import java.io.File; import java.util.ArrayList; import java.util.HashMap; @@ -70,12 +69,10 @@ public static class TIndex { public long[] l; // linear index } - public static class TIntv { public int tid, beg, end; } - public static boolean less64(final long u, final long v) { // unsigned 64-bit comparison return (u < v) ^ (u < 0) ^ (v < 0); } diff --git a/src/main/java/htsjdk/tribble/util/URLHelper.java b/src/main/java/htsjdk/tribble/util/URLHelper.java index 7e545acb53..a17928edfe 100644 --- a/src/main/java/htsjdk/tribble/util/URLHelper.java +++ b/src/main/java/htsjdk/tribble/util/URLHelper.java @@ -62,9 +62,7 @@ public interface URLHelper { * @return * @throws IOException */ - InputStream openInputStreamForRange(long start, long end) throws IOException; public boolean exists() throws IOException; - } diff --git a/src/main/java/htsjdk/tribble/util/URLHelperFactory.java b/src/main/java/htsjdk/tribble/util/URLHelperFactory.java index 98ba8d19ca..56d93f3dfd 100644 --- a/src/main/java/htsjdk/tribble/util/URLHelperFactory.java +++ b/src/main/java/htsjdk/tribble/util/URLHelperFactory.java @@ -13,5 +13,4 @@ public interface URLHelperFactory { * @return a {@link URLHelper} object for the given URL */ URLHelper getHelper(URL url); - } diff --git a/src/main/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculation.java b/src/main/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculation.java index 8cee87740e..dd0efe5b13 100755 --- a/src/main/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculation.java +++ b/src/main/java/htsjdk/tribble/util/popgen/HardyWeinbergCalculation.java @@ -26,124 +26,121 @@ package htsjdk.tribble.util.popgen; /** -* The Broad Institute -* SOFTWARE COPYRIGHT NOTICE AGREEMENT -* This software and its documentation are copyright 2004 by the -* Broad Institute/Massachusetts Institute of Technology. All rights are reserved. -* -* This software is supplied without any warranty or guaranteed support whatsoever. Neither -* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. -*/ + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2004 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ /** -* This class calculates a HardyWeinberg p-value given three values representing -* the observed frequences of homozygous and heterozygous genotypes in the -* test population. -* -* @author Bob Handsaker -*/ + * This class calculates a HardyWeinberg p-value given three values representing + * the observed frequences of homozygous and heterozygous genotypes in the + * test population. + * + * @author Bob Handsaker + */ public final class HardyWeinbergCalculation { - /** - * This class is not instantiable. - */ - private HardyWeinbergCalculation() { - } - - /** - * Calculates exact two-sided hardy-weinberg p-value. Parameters - * are number of genotypes, number of rare alleles observed and - * number of heterozygotes observed. - * - * (c) 2003 Jan Wigginton, Goncalo Abecasis (goncalo@umich.edu) - */ - public static double hwCalculate(int obsAA, int obsAB, int obsBB) { - int diplotypes = obsAA + obsAB + obsBB; - int rare = (obsAA * 2) + obsAB; - int hets = obsAB; - - //make sure "rare" allele is really the rare allele - if (rare > diplotypes) { - rare = (2 * diplotypes) - rare; - } - - //make sure numbers aren't screwy - if (hets > rare) { - return -1; - } - - double[] tailProbs = new double[rare + 1]; - - for (int z = 0; z < tailProbs.length; z++) { - tailProbs[z] = 0; - } - - //start at midpoint - //cast to long and back to int to avoid overflow at large numbers - int mid = (int) (((long) rare * ((2 * diplotypes) - rare)) / (2 * diplotypes)); - - //check to ensure that midpoint and rare alleles have same parity - if (((rare & 1) ^ (mid & 1)) != 0) { - mid++; - } - - int het = mid; - int hom_r = (rare - mid) / 2; - int hom_c = diplotypes - het - hom_r; - - //Calculate probability for each possible observed heterozygote - //count up to a scaling constant, to avoid underflow and overflow - tailProbs[mid] = 1.0; - - double sum = tailProbs[mid]; - - for (het = mid; het > 1; het -= 2) { - tailProbs[het - 2] = (tailProbs[het] * het * (het - 1.0)) / (4.0 * (hom_r + 1.0) * (hom_c + - 1.0)); - sum += tailProbs[het - 2]; - - //2 fewer hets for next iteration -> add one rare and one common homozygote - hom_r++; - hom_c++; - } - - het = mid; - hom_r = (rare - mid) / 2; - hom_c = diplotypes - het - hom_r; - - for (het = mid; het <= (rare - 2); het += 2) { - tailProbs[het + 2] = (tailProbs[het] * 4.0 * hom_r * hom_c) / ((het + 2.0) * (het + - 1.0)); - sum += tailProbs[het + 2]; - - //2 more hets for next iteration -> subtract one rare and one common homozygote - hom_r--; - hom_c--; - } - - for (int z = 0; z < tailProbs.length; z++) { - tailProbs[z] /= sum; - } - - double top = tailProbs[hets]; - - for (int i = hets + 1; i <= rare; i++) { - top += tailProbs[i]; - } - - double otherSide = tailProbs[hets]; - - for (int i = hets - 1; i >= 0; i--) { - otherSide += tailProbs[i]; - } - - if ((top > 0.5) && (otherSide > 0.5)) { - return 1.0; - } - - if (top < otherSide) { - return top * 2; - } - - return otherSide * 2; - } -} \ No newline at end of file + /** + * This class is not instantiable. + */ + private HardyWeinbergCalculation() {} + + /** + * Calculates exact two-sided hardy-weinberg p-value. Parameters + * are number of genotypes, number of rare alleles observed and + * number of heterozygotes observed. + * + * (c) 2003 Jan Wigginton, Goncalo Abecasis (goncalo@umich.edu) + */ + public static double hwCalculate(int obsAA, int obsAB, int obsBB) { + int diplotypes = obsAA + obsAB + obsBB; + int rare = (obsAA * 2) + obsAB; + int hets = obsAB; + + // make sure "rare" allele is really the rare allele + if (rare > diplotypes) { + rare = (2 * diplotypes) - rare; + } + + // make sure numbers aren't screwy + if (hets > rare) { + return -1; + } + + double[] tailProbs = new double[rare + 1]; + + for (int z = 0; z < tailProbs.length; z++) { + tailProbs[z] = 0; + } + + // start at midpoint + // cast to long and back to int to avoid overflow at large numbers + int mid = (int) (((long) rare * ((2 * diplotypes) - rare)) / (2 * diplotypes)); + + // check to ensure that midpoint and rare alleles have same parity + if (((rare & 1) ^ (mid & 1)) != 0) { + mid++; + } + + int het = mid; + int hom_r = (rare - mid) / 2; + int hom_c = diplotypes - het - hom_r; + + // Calculate probability for each possible observed heterozygote + // count up to a scaling constant, to avoid underflow and overflow + tailProbs[mid] = 1.0; + + double sum = tailProbs[mid]; + + for (het = mid; het > 1; het -= 2) { + tailProbs[het - 2] = (tailProbs[het] * het * (het - 1.0)) / (4.0 * (hom_r + 1.0) * (hom_c + 1.0)); + sum += tailProbs[het - 2]; + + // 2 fewer hets for next iteration -> add one rare and one common homozygote + hom_r++; + hom_c++; + } + + het = mid; + hom_r = (rare - mid) / 2; + hom_c = diplotypes - het - hom_r; + + for (het = mid; het <= (rare - 2); het += 2) { + tailProbs[het + 2] = (tailProbs[het] * 4.0 * hom_r * hom_c) / ((het + 2.0) * (het + 1.0)); + sum += tailProbs[het + 2]; + + // 2 more hets for next iteration -> subtract one rare and one common homozygote + hom_r--; + hom_c--; + } + + for (int z = 0; z < tailProbs.length; z++) { + tailProbs[z] /= sum; + } + + double top = tailProbs[hets]; + + for (int i = hets + 1; i <= rare; i++) { + top += tailProbs[i]; + } + + double otherSide = tailProbs[hets]; + + for (int i = hets - 1; i >= 0; i--) { + otherSide += tailProbs[i]; + } + + if ((top > 0.5) && (otherSide > 0.5)) { + return 1.0; + } + + if (top < otherSide) { + return top * 2; + } + + return otherSide * 2; + } +} diff --git a/src/main/java/htsjdk/utils/ClassFinder.java b/src/main/java/htsjdk/utils/ClassFinder.java index 881065ce1c..0e8fadd4ce 100644 --- a/src/main/java/htsjdk/utils/ClassFinder.java +++ b/src/main/java/htsjdk/utils/ClassFinder.java @@ -25,7 +25,6 @@ */ import htsjdk.samtools.util.Log; - import java.io.File; import java.io.IOException; import java.lang.reflect.Modifier; @@ -35,13 +34,11 @@ import java.net.URLClassLoader; import java.util.Enumeration; import java.util.HashSet; -import java.util.LinkedHashSet; import java.util.Set; import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; - /** * Utility class that can scan for classes in the classpath and find all the ones * annotated with a particular annotation. @@ -94,8 +91,7 @@ public void find(String packageName, final Class parentType) { try { urls = loader.getResources(packageName); - } - catch (IOException ioe) { + } catch (IOException ioe) { log.warn("Could not read package: " + packageName, ioe); return; } @@ -119,16 +115,14 @@ public void find(String packageName, final Class parentType) { continue; } - //Log.info("Looking for classes in location: " + urlPath); + // Log.info("Looking for classes in location: " + urlPath); final File file = new File(urlPath); - if ( file.isDirectory() ) { + if (file.isDirectory()) { scanDir(file, packageName); - } - else { + } else { scanJar(file, packageName); } - } - catch (IOException ioe) { + } catch (IOException ioe) { log.warn("could not read entries", ioe); } } @@ -142,7 +136,7 @@ public void find(String packageName, final Class parentType) { protected void scanJar(final File file, final String packagePath) throws IOException { final ZipFile zip = new ZipFile(file); final Enumeration entries = zip.entries(); - while ( entries.hasMoreElements() ) { + while (entries.hasMoreElements()) { final ZipEntry entry = entries.nextElement(); final String name = entry.getName(); if (name.startsWith(packagePath)) { @@ -157,12 +151,11 @@ protected void scanJar(final File file, final String packagePath) throws IOExcep * @param path the package path acculmulated so far (e.g. edu/mit/broad) */ protected void scanDir(final File file, final String path) { - for ( final File child: file.listFiles() ) { - final String newPath = (path==null ? child.getName() : path + '/' + child.getName() ); - if ( child.isDirectory() ) { + for (final File child : file.listFiles()) { + final String newPath = (path == null ? child.getName() : path + '/' + child.getName()); + if (child.isDirectory()) { scanDir(child, newPath); - } - else { + } else { handleItem(newPath); } } @@ -182,8 +175,7 @@ protected void handleItem(final String name) { if (parentType.isAssignableFrom(type)) { this.classes.add(type); } - } - catch (Throwable t) { + } catch (Throwable t) { log.debug("could not load class: " + classname, t); } } @@ -194,16 +186,13 @@ public Set> getClasses() { return this.classes; } - /** * Fetches the set of classes discovered so far, subsetted down to concrete (non-abstract/interface) classes only * * @return subset of classes discovered so far including only concrete (non-abstract/interface) classes */ public Set> getConcreteClasses() { - return getClasses().stream() - .filter(ClassFinder::isConcrete) - .collect(Collectors.toSet()); + return getClasses().stream().filter(ClassFinder::isConcrete).collect(Collectors.toSet()); } /** @@ -212,7 +201,7 @@ public Set> getConcreteClasses() { * @param clazz class to check * @return true if the class is neither abstract nor an interface, otherwise false */ - public static boolean isConcrete( final Class clazz ) { - return ! Modifier.isAbstract(clazz.getModifiers()) && ! Modifier.isInterface(clazz.getModifiers()); + public static boolean isConcrete(final Class clazz) { + return !Modifier.isAbstract(clazz.getModifiers()) && !Modifier.isInterface(clazz.getModifiers()); } } diff --git a/src/main/java/htsjdk/utils/ValidationUtils.java b/src/main/java/htsjdk/utils/ValidationUtils.java index 911e758489..5b57814ed2 100644 --- a/src/main/java/htsjdk/utils/ValidationUtils.java +++ b/src/main/java/htsjdk/utils/ValidationUtils.java @@ -89,7 +89,6 @@ public static > T nonEmpty(final T collection, final Str throw new IllegalArgumentException(nameOfObject + " cannot be empty"); } return collection; - } /** @@ -118,7 +117,6 @@ public static String nonEmpty(final String string, final String nameOfObject) { throw new IllegalArgumentException("The string is empty: " + nameOfObject); } return string; - } /** diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java index 97e8ce959d..6f2f81a935 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Codec.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; @@ -44,7 +44,6 @@ import htsjdk.variant.vcf.VCFContigHeaderLine; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineType; - import java.io.*; import java.nio.file.Files; import java.util.ArrayList; @@ -56,13 +55,13 @@ * Decode BCF2 files */ public class BCF2Codec extends BinaryFeatureCodec { - protected final static int ALLOWED_MAJOR_VERSION = 2; - protected final static int ALLOWED_MINOR_VERSION = 1; + protected static final int ALLOWED_MAJOR_VERSION = 2; + protected static final int ALLOWED_MINOR_VERSION = 1; public static final BCFVersion ALLOWED_BCF_VERSION = new BCFVersion(ALLOWED_MAJOR_VERSION, ALLOWED_MINOR_VERSION); /** sizeof a BCF header (+ min/max version). Used when trying to detect when a streams starts with a bcf header */ - public static final int SIZEOF_BCF_HEADER = BCFVersion.MAGIC_HEADER_START.length + 2*Byte.BYTES; - + public static final int SIZEOF_BCF_HEADER = BCFVersion.MAGIC_HEADER_START.length + 2 * Byte.BYTES; + private BCFVersion bcfVersion = null; private VCFHeader header = null; @@ -87,7 +86,7 @@ public class BCF2Codec extends BinaryFeatureCodec { /** * Provides some sanity checking on the header */ - private final static int MAX_HEADER_SIZE = 0x08000000; + private static final int MAX_HEADER_SIZE = 0x08000000; /** * Genotype field decoders that are initialized when the header is read @@ -106,7 +105,6 @@ public class BCF2Codec extends BinaryFeatureCodec { private int recordNo = 0; private int pos = 0; - // ---------------------------------------------------------------------- // // Feature codec interface functions @@ -114,12 +112,12 @@ public class BCF2Codec extends BinaryFeatureCodec { // ---------------------------------------------------------------------- @Override - public Feature decodeLoc( final PositionalBufferedStream inputStream ) { + public Feature decodeLoc(final PositionalBufferedStream inputStream) { return decode(inputStream); } @Override - public VariantContext decode( final PositionalBufferedStream inputStream ) { + public VariantContext decode(final PositionalBufferedStream inputStream) { try { recordNo++; final VariantContextBuilder builder = new VariantContextBuilder(); @@ -134,7 +132,7 @@ public VariantContext decode( final PositionalBufferedStream inputStream ) { decoder.readNextBlock(genotypeBlockSize, inputStream); createLazyGenotypesDecoder(info, builder); return builder.fullyDecoded(true).make(); - } catch ( IOException e ) { + } catch (IOException e) { throw new TribbleException("Failed to read BCF file", e); } } @@ -157,37 +155,38 @@ public Class getFeatureType() { * with {@code supportedVersion} */ protected void validateVersionCompatibility(final BCFVersion supportedVersion, final BCFVersion actualVersion) { - if ( actualVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION ) { + if (actualVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION) { error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion()); } // require the minor version to be an exact match and reject minor versions form the future - if ( actualVersion.getMinorVersion() != ALLOWED_MINOR_VERSION ) { - error("BCF2Codec can only process BCF2 files with minor version = " + ALLOWED_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion()); + if (actualVersion.getMinorVersion() != ALLOWED_MINOR_VERSION) { + error("BCF2Codec can only process BCF2 files with minor version = " + ALLOWED_MINOR_VERSION + + " but this file has minor version " + bcfVersion.getMinorVersion()); } } @Override - public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) { + public FeatureCodecHeader readHeader(final PositionalBufferedStream inputStream) { try { // note that this reads the magic as well, and so does double duty bcfVersion = BCFVersion.readBCFVersion(inputStream); - if ( bcfVersion == null ) { + if (bcfVersion == null) { error("Input stream does not contain a BCF encoded file; BCF magic header info not found"); } validateVersionCompatibility(BCF2Codec.ALLOWED_BCF_VERSION, bcfVersion); - if ( GeneralUtils.DEBUG_MODE_ENABLED ) { + if (GeneralUtils.DEBUG_MODE_ENABLED) { System.err.println("Parsing data stream with BCF version " + bcfVersion); } final int headerSizeInBytes = BCF2Type.INT32.read(inputStream); - if ( headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB - error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < "+ MAX_HEADER_SIZE); + if (headerSizeInBytes <= 0 || headerSizeInBytes > MAX_HEADER_SIZE) // no bigger than 8 MB + error("BCF2 header has invalid length: " + headerSizeInBytes + " must be >= 0 and < " + MAX_HEADER_SIZE); final byte[] headerBytes = new byte[headerSizeInBytes]; - if ( inputStream.read(headerBytes) != headerSizeInBytes ) + if (inputStream.read(headerBytes) != headerSizeInBytes) error("Couldn't read all of the bytes specified in the header length = " + headerSizeInBytes); final PositionalBufferedStream bps = new PositionalBufferedStream(new ByteArrayInputStream(headerBytes)); @@ -195,15 +194,15 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream final VCFCodec headerParser = new VCFCodec(); this.header = (VCFHeader) headerParser.readActualHeader(lineIterator); bps.close(); - } catch ( IOException e ) { + } catch (IOException e) { throw new TribbleException("I/O error while reading BCF2 header"); } // create the config offsets - if ( ! header.getContigLines().isEmpty() ) { + if (!header.getContigLines().isEmpty()) { contigNames.clear(); - for ( final VCFContigHeaderLine contig : header.getContigLines()) { - if ( contig.getID() == null || contig.getID().equals("") ) + for (final VCFContigHeaderLine contig : header.getContigLines()) { + if (contig.getID() == null || contig.getID().equals("")) error("found a contig with an invalid ID " + contig); contigNames.add(contig.getID()); } @@ -220,7 +219,7 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream // create and initialize the genotype builder array final int nSamples = header.getNGenotypeSamples(); builders = new GenotypeBuilder[nSamples]; - for ( int i = 0; i < nSamples; i++ ) { + for (int i = 0; i < nSamples; i++) { builders[i] = new GenotypeBuilder(header.getGenotypeSamples().get(i)); } @@ -229,11 +228,11 @@ public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream } @Override - public boolean canDecode( final String path ) { - try (InputStream fis = Files.newInputStream(IOUtil.getPath(path)) ){ + public boolean canDecode(final String path) { + try (InputStream fis = Files.newInputStream(IOUtil.getPath(path))) { final BCFVersion version = BCFVersion.readBCFVersion(fis); return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION; - } catch ( final IOException e ) { + } catch (final IOException e) { return false; } } @@ -264,8 +263,8 @@ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOE this.pos = decoder.decodeInt(BCF2Type.INT32) + 1; // GATK is one based, BCF2 is zero-based final int refLength = decoder.decodeInt(BCF2Type.INT32); - builder.start((long)pos); - builder.stop((long)(pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open + builder.start((long) pos); + builder.stop((long) (pos + refLength - 1)); // minus one because GATK has closed intervals but BCF2 is open } /** @@ -276,8 +275,8 @@ private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOE */ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); - if ( qual != null ) { - builder.log10PError(((Double)qual) / -10.0); + if (qual != null) { + builder.log10PError(((Double) qual) / -10.0); } final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32); @@ -287,10 +286,11 @@ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextB final int nFormatFields = nFormatSamples >> 24; final int nSamples = nFormatSamples & 0x00FFFFF; - if ( header.getNGenotypeSamples() != nSamples ) - error("Reading BCF2 files with different numbers of samples per record " + - "is not currently supported. Saw " + header.getNGenotypeSamples() + - " samples in header but have a record with " + nSamples + " samples"); + if (header.getNGenotypeSamples() != nSamples) + error("Reading BCF2 files with different numbers of samples per record " + + "is not currently supported. Saw " + + header.getNGenotypeSamples() + " samples in header but have a record with " + + nSamples + " samples"); decodeID(builder); final List alleles = decodeAlleles(builder, pos, nAlleles); @@ -298,12 +298,11 @@ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextB decodeInfo(builder, nInfo); final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles); - if ( ! info.isValid() ) - error("Sites info is malformed: " + info); + if (!info.isValid()) error("Sites info is malformed: " + info); return info; } - protected final static class SitesInfoForDecoding { + protected static final class SitesInfoForDecoding { final int nFormatFields; final int nSamples; final List alleles; @@ -315,9 +314,11 @@ private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final } public boolean isValid() { - return nFormatFields >= 0 && - nSamples >= 0 && - alleles != null && ! alleles.isEmpty() && alleles.get(0).isReference(); + return nFormatFields >= 0 + && nSamples >= 0 + && alleles != null + && !alleles.isEmpty() + && alleles.get(0).isReference(); } @Override @@ -330,13 +331,11 @@ public String toString() { * Decode the id field in this BCF2 file and store it in the builder * @param builder */ - private void decodeID( final VariantContextBuilder builder ) throws IOException { - final String id = (String)decoder.decodeTypedValue(); + private void decodeID(final VariantContextBuilder builder) throws IOException { + final String id = (String) decoder.decodeTypedValue(); - if ( id == null ) - builder.noID(); - else - builder.id(id); + if (id == null) builder.noID(); + else builder.id(id); } /** @@ -346,17 +345,19 @@ private void decodeID( final VariantContextBuilder builder ) throws IOException * @param nAlleles * @return the alleles */ - private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException { - // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes + private List decodeAlleles(final VariantContextBuilder builder, final int pos, final int nAlleles) + throws IOException { + // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> + // bytes List alleles = new ArrayList(nAlleles); String ref = null; - for ( int i = 0; i < nAlleles; i++ ) { - final String alleleBases = (String)decoder.decodeTypedValue(); + for (int i = 0; i < nAlleles; i++) { + final String alleleBases = (String) decoder.decodeTypedValue(); final boolean isRef = i == 0; final Allele allele = Allele.create(alleleBases, isRef); - if ( isRef ) ref = alleleBases; + if (isRef) ref = alleleBases; alleles.add(allele); } @@ -373,22 +374,18 @@ private List decodeAlleles( final VariantContextBuilder builder, final i * Decode the filter field of this BCF2 file and store the result in the builder * @param builder */ - private void decodeFilter( final VariantContextBuilder builder ) throws IOException { + private void decodeFilter(final VariantContextBuilder builder) throws IOException { final Object value = decoder.decodeTypedValue(); - if ( value == null ) - builder.unfiltered(); + if (value == null) builder.unfiltered(); else { - if ( value instanceof Integer ) { + if (value instanceof Integer) { // fast path for single integer result - final String filterString = getDictionaryString((Integer)value); - if ( VCFConstants.PASSES_FILTERS_v4.equals(filterString)) - builder.passFilters(); - else - builder.filter(filterString); + final String filterString = getDictionaryString((Integer) value); + if (VCFConstants.PASSES_FILTERS_v4.equals(filterString)) builder.passFilters(); + else builder.filter(filterString); } else { - for ( final int offset : (List)value ) - builder.filter(getDictionaryString(offset)); + for (final int offset : (List) value) builder.filter(getDictionaryString(offset)); } } } @@ -399,17 +396,17 @@ private void decodeFilter( final VariantContextBuilder builder ) throws IOExcept * @param builder * @param numInfoFields */ - private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { - if ( numInfoFields == 0 ) + private void decodeInfo(final VariantContextBuilder builder, final int numInfoFields) throws IOException { + if (numInfoFields == 0) // fast path, don't bother doing any work if there are no fields return; final Map infoFieldEntries = new HashMap(numInfoFields); - for ( int i = 0; i < numInfoFields; i++ ) { + for (int i = 0; i < numInfoFields; i++) { final String key = getDictionaryString(); Object value = decoder.decodeTypedValue(); final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, key); - if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags + if (metaData.getType() == VCFHeaderLineType.Flag) value = true; // special case for flags infoFieldEntries.put(key, value); } @@ -429,27 +426,26 @@ private void decodeInfo( final VariantContextBuilder builder, final int numInfoF * @param siteInfo * @param builder */ - private void createLazyGenotypesDecoder( final SitesInfoForDecoding siteInfo, - final VariantContextBuilder builder ) { + private void createLazyGenotypesDecoder(final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder) { if (siteInfo.nSamples > 0) { - final LazyGenotypesContext.LazyParser lazyParser = - new BCF2LazyGenotypesDecoder(this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); + final LazyGenotypesContext.LazyParser lazyParser = new BCF2LazyGenotypesDecoder( + this, siteInfo.alleles, siteInfo.nSamples, siteInfo.nFormatFields, builders); final LazyData lazyData = new LazyData(header, siteInfo.nFormatFields, decoder.getRecordBytes()); - final LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); + final LazyGenotypesContext lazy = + new LazyGenotypesContext(lazyParser, lazyData, header.getNGenotypeSamples()); // did we resort the sample names? If so, we need to load the genotype data - if ( !header.samplesWereAlreadySorted() ) - lazy.decode(); + if (!header.samplesWereAlreadySorted()) lazy.decode(); builder.genotypesNoValidation(lazy); } } public static class LazyData { - final public VCFHeader header; - final public int nGenotypeFields; - final public byte[] bytes; + public final VCFHeader header; + public final int nGenotypeFields; + public final byte[] bytes; public LazyData(final VCFHeader header, final int nGenotypeFields, final byte[] bytes) { this.header = header; @@ -473,7 +469,7 @@ protected final String getDictionaryString(final int offset) { * @param contigOffset * @return */ - private final String lookupContigName( final int contigOffset ) { + private final String lookupContigName(final int contigOffset) { return contigNames.get(contigOffset); } @@ -481,8 +477,7 @@ private final ArrayList parseDictionary(final VCFHeader header) { final ArrayList dict = BCF2Utils.makeDictionary(header); // if we got here we never found a dictionary, or there are no elements in the dictionary - if ( dict.isEmpty() ) - error("Dictionary header element was absent or empty"); + if (dict.isEmpty()) error("Dictionary header element was absent or empty"); return dict; } @@ -501,10 +496,10 @@ protected BCF2GenotypeFieldDecoders.Decoder getGenotypeFieldDecoder(final String protected void error(final String message) throws RuntimeException { throw new TribbleException(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } - + /** try to read a BCFVersion from an uncompressed BufferedInputStream. * The buffer must be large enough to contain {@link #SIZEOF_BCF_HEADER} - * + * * @param uncompressedBufferedInput the uncompressed input stream * @return the BCFVersion if it can be decoded, or null if not found. * @throws IOException @@ -515,5 +510,4 @@ public static BCFVersion tryReadBCFVersion(final BufferedInputStream uncompresse uncompressedBufferedInput.reset(); return bcfVersion; } - } diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java index 0dd166eef6..bc41e4872f 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Decoder.java @@ -1,33 +1,32 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; import htsjdk.tribble.TribbleException; import htsjdk.variant.utils.GeneralUtils; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; @@ -63,7 +62,7 @@ protected BCF2Decoder(final byte[] recordBytes) { * @param stream */ public void readNextBlock(final int blockSizeInBytes, final InputStream stream) { - if ( blockSizeInBytes < 0 ) throw new TribbleException("Invalid block size " + blockSizeInBytes); + if (blockSizeInBytes < 0) throw new TribbleException("Invalid block size " + blockSizeInBytes); setRecordBytes(readRecordBytes(blockSizeInBytes, stream)); } @@ -74,9 +73,9 @@ public void readNextBlock(final int blockSizeInBytes, final InputStream stream) */ public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) { try { - final int bytesRead = (int)stream.skip(blockSizeInBytes); + final int bytesRead = (int) stream.skip(blockSizeInBytes); validateReadBytes(bytesRead, 1, blockSizeInBytes); - } catch ( IOException e ) { + } catch (IOException e) { throw new TribbleException("I/O error while reading BCF2 file", e); } this.recordBytes = null; @@ -131,20 +130,20 @@ public final Object decodeTypedValue(final byte typeDescriptor) throws IOExcepti } public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException { - if ( size == 0 ) { + if (size == 0) { // missing value => null in java return null; } else { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); - if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency + if (type == BCF2Type.CHAR) { // special case string decoding for efficiency return decodeLiteralString(size); - } else if ( size == 1 ) { + } else if (size == 1) { return decodeSingleValue(type); } else { final ArrayList ints = new ArrayList(size); - for ( int i = 0; i < size; i++ ) { + for (int i = 0; i < size; i++) { final Object val = decodeSingleValue(type); - if ( val == null ) continue; // auto-pruning. We remove trailing nulls + if (val == null) continue; // auto-pruning. We remove trailing nulls ints.add(val); } return ints.isEmpty() ? null : ints; // return null when all of the values are null @@ -156,16 +155,20 @@ public final Object decodeSingleValue(final BCF2Type type) throws IOException { // TODO -- decodeTypedValue should integrate this routine final int value = decodeInt(type); - if ( value == type.getMissingBytes() ) - return null; + if (value == type.getMissingBytes()) return null; else { switch (type) { case INT8: case INT16: - case INT32: return value; - case FLOAT: return rawFloatToFloat(value); - case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased - default: throw new TribbleException("BCF2 codec doesn't know how to decode type " + type ); + case INT32: + return value; + case FLOAT: + return rawFloatToFloat(value); + case CHAR: + return value + & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased + default: + throw new TribbleException("BCF2 codec doesn't know how to decode type " + type); } } } @@ -185,22 +188,20 @@ private final Object decodeLiteralString(final int size) { recordStream.read(bytes); int goodLength = 0; - for ( ; goodLength < bytes.length ; goodLength++ ) - if ( bytes[goodLength] == 0 ) break; + for (; goodLength < bytes.length; goodLength++) if (bytes[goodLength] == 0) break; - if ( goodLength == 0 ) - return null; + if (goodLength == 0) return null; else { final String s = new String(bytes, 0, goodLength); return BCF2Utils.isCollapsedString(s) ? BCF2Utils.explodeStringList(s) : s; } - } catch ( IOException e ) { + } catch (IOException e) { throw new TribbleException("readByte failure", e); } } public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException { - if ( BCF2Utils.sizeIsOverflow(typeDescriptor) ) + if (BCF2Utils.sizeIsOverflow(typeDescriptor)) // -1 ensures we explode immediately with a bad size if the result is missing return decodeInt(readTypeDescriptor(), -1); else @@ -245,26 +246,27 @@ public final int decodeInt(final BCF2Type type) throws IOException { * @return see description */ public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException { - if ( size == 0 ) { + if (size == 0) { return null; } else { - if ( maybeDest != null && maybeDest.length < size ) - maybeDest = null; // by nulling this out we ensure that we do fresh allocations as maybeDest is too small + if (maybeDest != null && maybeDest.length < size) + maybeDest = + null; // by nulling this out we ensure that we do fresh allocations as maybeDest is too small final int val1 = decodeInt(type); - if ( val1 == type.getMissingBytes() ) { + if (val1 == type.getMissingBytes()) { // fast path for first element being missing - for ( int i = 1; i < size; i++ ) decodeInt(type); + for (int i = 1; i < size; i++) decodeInt(type); return null; } else { // we know we will have at least 1 element, so making the int[] is worth it final int[] ints = maybeDest == null ? new int[size] : maybeDest; ints[0] = val1; // we already read the first one - for ( int i = 1; i < size; i++ ) { + for (int i = 1; i < size; i++) { ints[i] = decodeInt(type); - if ( ints[i] == type.getMissingBytes() ) { + if (ints[i] == type.getMissingBytes()) { // read the rest of the missing values, dropping them - for ( int j = i + 1; j < size; j++ ) decodeInt(type); + for (int j = i + 1; j < size; j++) decodeInt(type); // deal with auto-pruning by returning an int[] containing // only the non-MISSING values. We do this by copying the first // i elements, as i itself is missing @@ -282,7 +284,7 @@ public final int[] decodeIntArray(final byte typeDescriptor, final int size) thr } private double rawFloatToFloat(final int rawFloat) { - return (double)Float.intBitsToFloat(rawFloat); + return (double) Float.intBitsToFloat(rawFloat); } // ---------------------------------------------------------------------- @@ -319,20 +321,19 @@ private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStr int nReadAttempts = 0; // keep track of how many times we've read // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF - while ( bytesRead < blockSizeInBytes ) { + while (bytesRead < blockSizeInBytes) { final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead); - if ( read1 == -1 ) - validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); - else - bytesRead += read1; + if (read1 == -1) validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); + else bytesRead += read1; } - if ( GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1 ) { // TODO -- remove me - System.err.println("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior"); + if (GeneralUtils.DEBUG_MODE_ENABLED && nReadAttempts > 1) { // TODO -- remove me + System.err.println( + "Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior"); } validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes); - } catch ( IOException e ) { + } catch (IOException e) { throw new TribbleException("I/O error while reading BCF2 file", e); } @@ -349,10 +350,10 @@ private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStr private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) { assert expected >= 0; - if ( actuallyRead < expected ) { - throw new TribbleException( - String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations", - expected, actuallyRead, nReadAttempts)); + if (actuallyRead < expected) { + throw new TribbleException(String.format( + "Failed to read next complete record: expected %d bytes but read only %d after %d iterations", + expected, actuallyRead, nReadAttempts)); } } diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java index c406b6602d..edeaaea326 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2GenotypeFieldDecoders.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; @@ -29,7 +29,6 @@ import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; - import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -46,8 +45,8 @@ * @since 6/12 */ public class BCF2GenotypeFieldDecoders { - private final static boolean ENABLE_FASTPATH_GT = true; - private final static int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number + private static final boolean ENABLE_FASTPATH_GT = true; + private static final int MIN_SAMPLES_FOR_FASTPATH_GENOTYPES = 0; // TODO -- update to reasonable number // initialized once per writer to allow parallel writers to work private final HashMap genotypeFieldDecoder = new HashMap(); @@ -57,7 +56,8 @@ public BCF2GenotypeFieldDecoders(final VCFHeader header) { // TODO -- fill in appropriate decoders for each FORMAT field in the header genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder()); - // currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype + // currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter + // field values per genotype genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder()); genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder()); genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder()); @@ -97,18 +97,30 @@ public Decoder getDecoder(final String field) { * the PL field into a int[] rather than the generic List of Integer */ public interface Decoder { - public void decode(final List siteAlleles, - final String field, - final BCF2Decoder decoder, - final byte typeDescriptor, - final int numElements, - final GenotypeBuilder[] gbs) throws IOException; + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException; } private class GTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + if (ENABLE_FASTPATH_GT + && siteAlleles.size() == 2 + && numElements == 2 + && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES) fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs); else { generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs); @@ -128,24 +140,26 @@ public void decode(final List siteAlleles, final String field, final BCF * n + 1 options including */ @SuppressWarnings({"unchecked"}) - private final void fastBiallelicDiploidDecode(final List siteAlleles, - final BCF2Decoder decoder, - final byte typeDescriptor, - final GenotypeBuilder[] gbs) throws IOException { + private final void fastBiallelicDiploidDecode( + final List siteAlleles, + final BCF2Decoder decoder, + final byte typeDescriptor, + final GenotypeBuilder[] gbs) + throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int nPossibleGenotypes = 3 * 3; final Object allGenotypes[] = new Object[nPossibleGenotypes]; - for ( final GenotypeBuilder gb : gbs ) { + for (final GenotypeBuilder gb : gbs) { final int a1 = decoder.decodeInt(type); final int a2 = decoder.decodeInt(type); - if ( a1 == type.getMissingBytes() ) { + if (a1 == type.getMissingBytes()) { assert a2 == type.getMissingBytes(); // no called sample GT = . gb.alleles(null); - } else if ( a2 == type.getMissingBytes() ) { + } else if (a2 == type.getMissingBytes()) { gb.alleles(Arrays.asList(getAlleleFromEncoded(siteAlleles, a1))); } else { // downshift to remove phase @@ -153,8 +167,8 @@ private final void fastBiallelicDiploidDecode(final List siteAlleles, assert offset < allGenotypes.length; // TODO -- how can I get rid of this cast? - List gt = (List)allGenotypes[offset]; - if ( gt == null ) { + List gt = (List) allGenotypes[offset]; + if (gt == null) { final Allele allele1 = getAlleleFromEncoded(siteAlleles, a1); final Allele allele2 = getAlleleFromEncoded(siteAlleles, a2); gt = Arrays.asList(allele1, allele2); @@ -169,19 +183,21 @@ private final void fastBiallelicDiploidDecode(final List siteAlleles, } } - private final void generalDecode(final List siteAlleles, - final int ploidy, - final BCF2Decoder decoder, - final byte typeDescriptor, - final GenotypeBuilder[] gbs) throws IOException { + private final void generalDecode( + final List siteAlleles, + final int ploidy, + final BCF2Decoder decoder, + final byte typeDescriptor, + final GenotypeBuilder[] gbs) + throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); // a single cache for the encoded genotypes, since we don't actually need this vector final int[] tmp = new int[ploidy]; - for ( final GenotypeBuilder gb : gbs ) { + for (final GenotypeBuilder gb : gbs) { final int[] encoded = decoder.decodeIntArray(ploidy, type, tmp); - if ( encoded == null ) + if (encoded == null) // no called sample GT = . gb.alleles(null); else { @@ -192,8 +208,7 @@ private final void generalDecode(final List siteAlleles, // note that the auto-pruning of fields magically handles different // ploidy per sample at a site - for ( final int encode : encoded ) - gt.add(getAlleleFromEncoded(siteAlleles, encode)); + for (final int encode : encoded) gt.add(getAlleleFromEncoded(siteAlleles, encode)); gb.alleles(gt); final boolean phased = ((encoded.length > 1 ? encoded[1] : encoded[0]) & 0x01) == 1; @@ -210,8 +225,15 @@ private final Allele getAlleleFromEncoded(final List siteAlleles, final private class DPDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { // the -1 is for missing gb.DP(decoder.decodeInt(typeDescriptor, -1)); } @@ -220,8 +242,15 @@ public void decode(final List siteAlleles, final String field, final BCF private class GQDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { // the -1 is for missing gb.GQ(decoder.decodeInt(typeDescriptor, -1)); } @@ -230,8 +259,15 @@ public void decode(final List siteAlleles, final String field, final BCF private class ADDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { gb.AD(decoder.decodeIntArray(typeDescriptor, numElements)); } } @@ -239,8 +275,15 @@ public void decode(final List siteAlleles, final String field, final BCF private class PLDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { gb.PL(decoder.decodeIntArray(typeDescriptor, numElements)); } } @@ -248,16 +291,23 @@ public void decode(final List siteAlleles, final String field, final BCF private class GenericDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); - if ( value != null ) { // don't add missing values - if ( value instanceof List && ((List)value).size() == 1) { + if (value != null) { // don't add missing values + if (value instanceof List && ((List) value).size() == 1) { // todo -- I really hate this, and it suggests that the code isn't completely right // the reason it's here is that it's possible to prune down a vector to a singleton // value and there we have the contract that the value comes back as an atomic value // not a vector of size 1 - value = ((List)value).get(0); + value = ((List) value).get(0); } gb.attribute(field, value); } @@ -267,11 +317,18 @@ public void decode(final List siteAlleles, final String field, final BCF private class FTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { - for ( final GenotypeBuilder gb : gbs ) { + public void decode( + final List siteAlleles, + final String field, + final BCF2Decoder decoder, + final byte typeDescriptor, + final int numElements, + final GenotypeBuilder[] gbs) + throws IOException { + for (final GenotypeBuilder gb : gbs) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); assert value == null || value instanceof String; - gb.filter((String)value); + gb.filter((String) value); } } } diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java b/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java index aadea53dfb..6305a4fe8b 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2LazyGenotypesDecoder.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; @@ -30,7 +30,6 @@ import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.LazyGenotypesContext; - import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -51,8 +50,12 @@ public class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser private final int nFields; private final GenotypeBuilder[] builders; - BCF2LazyGenotypesDecoder(final BCF2Codec codec, final List alleles, final int nSamples, - final int nFields, final GenotypeBuilder[] builders) { + BCF2LazyGenotypesDecoder( + final BCF2Codec codec, + final List alleles, + final int nSamples, + final int nFields, + final GenotypeBuilder[] builders) { this.codec = codec; this.siteAlleles = alleles; this.nSamples = nSamples; @@ -65,12 +68,11 @@ public LazyGenotypesContext.LazyData parse(final Object data) { try { // load our byte[] data into the decoder - final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); + final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData) data).bytes); - for ( int i = 0; i < nSamples; i++ ) - builders[i].reset(true); + for (int i = 0; i < nSamples; i++) builders[i].reset(true); - for ( int i = 0; i < nFields; i++ ) { + for (int i = 0; i < nFields; i++) { // get the field name final int offset = (Integer) decoder.decodeTypedValue(); final String field = codec.getDictionaryString(offset); @@ -81,18 +83,20 @@ public LazyGenotypesContext.LazyData parse(final Object data) { final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); try { fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); - } catch ( ClassCastException e ) { + } catch (ClassCastException e) { throw new TribbleException("BUG: expected encoding of field " + field + " inconsistent with the value observed in the decoded value"); } } final ArrayList genotypes = new ArrayList(nSamples); - for ( final GenotypeBuilder gb : builders ) - genotypes.add(gb.make()); + for (final GenotypeBuilder gb : builders) genotypes.add(gb.make()); - return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); - } catch ( IOException e ) { + return new LazyGenotypesContext.LazyData( + genotypes, + codec.getHeader().getSampleNamesInOrder(), + codec.getHeader().getSampleNameToOffset()); + } catch (IOException e) { throw new TribbleException("Unexpected IOException parsing already read genotypes data block", e); } } diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java index 11c8edf6c5..d87f10ea3e 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Type.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Type.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; @@ -39,15 +39,18 @@ public enum BCF2Type { // the actual values themselves MISSING(0, 0, 0x00) { - @Override public int read(final InputStream in) throws IOException { + @Override + public int read(final InputStream in) throws IOException { throw new IllegalArgumentException("Cannot read MISSING type"); } - @Override public void write(final int value, final OutputStream out) throws IOException { + + @Override + public void write(final int value, final OutputStream out) throws IOException { throw new IllegalArgumentException("Cannot write MISSING type"); } }, - INT8 (1, 1, 0xFFFFFF80, -127, 127) { + INT8(1, 1, 0xFFFFFF80, -127, 127) { @Override public int read(final InputStream in) throws IOException { return BCF2Utils.readByte(in); @@ -55,16 +58,16 @@ public int read(final InputStream in) throws IOException { @Override public void write(final int value, final OutputStream out) throws IOException { - out.write(0xFF & value); // TODO -- do we need this operation? + out.write(0xFF & value); // TODO -- do we need this operation? } }, - INT16(2, 2, 0xFFFF8000, -32767, 32767) { + INT16(2, 2, 0xFFFF8000, -32767, 32767) { @Override public int read(final InputStream in) throws IOException { final int b2 = BCF2Utils.readByte(in) & 0xFF; final int b1 = BCF2Utils.readByte(in) & 0xFF; - return (short)((b1 << 8) | b2); + return (short) ((b1 << 8) | b2); } @Override @@ -82,7 +85,7 @@ public int read(final InputStream in) throws IOException { final int b3 = BCF2Utils.readByte(in) & 0xFF; final int b2 = BCF2Utils.readByte(in) & 0xFF; final int b1 = BCF2Utils.readByte(in) & 0xFF; - return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4); + return (int) (b1 << 24 | b2 << 16 | b3 << 8 | b4); } @Override @@ -106,7 +109,7 @@ public void write(final int value, final OutputStream out) throws IOException { } }, - CHAR (7, 1, 0x00000000) { + CHAR(7, 1, 0x00000000) { @Override public int read(final InputStream in) throws IOException { return INT8.read(in); @@ -149,7 +152,9 @@ public int getSizeInBytes() { * The ID according to the BCF2 specification * @return */ - public int getID() { return id; } + public int getID() { + return id; + } /** * Can we encode value v in this type, according to its declared range. @@ -159,7 +164,9 @@ public int getSizeInBytes() { * @param v * @return */ - public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; } + public final boolean withinRange(final long v) { + return v >= minValue && v <= maxValue; + } /** * Return the java object (aka null) that is used to represent a missing value for this @@ -167,7 +174,9 @@ public int getSizeInBytes() { * * @return */ - public Object getMissingJavaValue() { return missingJavaValue; } + public Object getMissingJavaValue() { + return missingJavaValue; + } /** * The bytes (encoded as an int) that are used to represent a missing value @@ -175,12 +184,14 @@ public int getSizeInBytes() { * * @return */ - public int getMissingBytes() { return missingBytes; } + public int getMissingBytes() { + return missingBytes; + } /** * An enum set of the types that might represent Integer values */ - private final static EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); + private static final EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); /** * @return true if this BCF2Type corresponds to the magic "MISSING" type (0x00) diff --git a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java index 39478bf069..d5f34a2c1f 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java +++ b/src/main/java/htsjdk/variant/bcf2/BCF2Utils.java @@ -1,34 +1,33 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; import htsjdk.samtools.util.FileExtensions; import htsjdk.tribble.TribbleException; import htsjdk.variant.vcf.*; - import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; @@ -57,14 +56,15 @@ public final class BCF2Utils { public static final int OVERFLOW_ELEMENT_MARKER = 15; public static final int MAX_INLINE_ELEMENTS = 14; - public final static BCF2Type[] INTEGER_TYPES_BY_SIZE = new BCF2Type[]{BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32}; - public final static BCF2Type[] ID_TO_ENUM; + public static final BCF2Type[] INTEGER_TYPES_BY_SIZE = + new BCF2Type[] {BCF2Type.INT8, BCF2Type.INT16, BCF2Type.INT32}; + public static final BCF2Type[] ID_TO_ENUM; static { int maxID = -1; - for ( BCF2Type v : BCF2Type.values() ) maxID = Math.max(v.getID(), maxID); - ID_TO_ENUM = new BCF2Type[maxID+1]; - for ( BCF2Type v : BCF2Type.values() ) ID_TO_ENUM[v.getID()] = v; + for (BCF2Type v : BCF2Type.values()) maxID = Math.max(v.getID(), maxID); + ID_TO_ENUM = new BCF2Type[maxID + 1]; + for (BCF2Type v : BCF2Type.values()) ID_TO_ENUM[v.getID()] = v; } private BCF2Utils() {} @@ -91,10 +91,10 @@ public static ArrayList makeDictionary(final VCFHeader header) { dict.add(VCFConstants.PASSES_FILTERS_v4); // set up the strings dictionary - for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { - if ( line.shouldBeAddedToDictionary() ) { - final VCFIDHeaderLine idLine = (VCFIDHeaderLine)line; - if ( ! seen.contains(idLine.getID())) { + for (VCFHeaderLine line : header.getMetaDataInInputOrder()) { + if (line.shouldBeAddedToDictionary()) { + final VCFIDHeaderLine idLine = (VCFIDHeaderLine) line; + if (!seen.contains(idLine.getID())) { dict.add(idLine.getID()); seen.add(idLine.getID()); } @@ -104,8 +104,8 @@ public static ArrayList makeDictionary(final VCFHeader header) { return dict; } - public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { - return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F)); + public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type) { + return (byte) ((0x0F & nElements) << 4 | (type.getID() & 0x0F)); } public static int decodeSize(final byte typeDescriptor) { @@ -125,7 +125,7 @@ public static boolean sizeIsOverflow(final byte typeDescriptor) { } public static byte readByte(final InputStream stream) throws IOException { - return (byte)(stream.read() & 0xFF); + return (byte) (stream.read() & 0xFF); } /** @@ -137,12 +137,12 @@ public static byte readByte(final InputStream stream) throws IOException { * @return */ public static String collapseStringList(final List strings) { - if ( strings.isEmpty() ) return ""; - else if ( strings.size() == 1 ) return strings.get(0); + if (strings.isEmpty()) return ""; + else if (strings.size() == 1) return strings.get(0); else { final StringBuilder b = new StringBuilder(); - for ( final String s : strings ) { - if ( s != null ) { + for (final String s : strings) { + if (s != null) { assert s.indexOf(",") == -1; // no commas in individual strings b.append(',').append(s); } @@ -184,12 +184,10 @@ public static boolean isCollapsedString(final String s) { */ public static final File shadowBCF(final File vcfFile) { final String path = vcfFile.getAbsolutePath(); - if ( path.contains(FileExtensions.VCF) ) - return new File(path.replace(FileExtensions.VCF, FileExtensions.BCF)); + if (path.contains(FileExtensions.VCF)) return new File(path.replace(FileExtensions.VCF, FileExtensions.BCF)); else { - final File bcf = new File( path + FileExtensions.BCF ); - if ( bcf.canRead() ) - return bcf; + final File bcf = new File(path + FileExtensions.BCF); + if (bcf.canRead()) return bcf; else { try { // this is the only way to robustly decide if we could actually write to BCF @@ -197,9 +195,9 @@ public static final File shadowBCF(final File vcfFile) { o.close(); bcf.delete(); return bcf; - } catch ( FileNotFoundException e ) { + } catch (FileNotFoundException e) { return null; - } catch ( IOException e ) { + } catch (IOException e) { return null; } } @@ -207,9 +205,8 @@ public static final File shadowBCF(final File vcfFile) { } public static BCF2Type determineIntegerType(final int value) { - for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { - if ( potentialType.withinRange(value) ) - return potentialType; + for (final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { + if (potentialType.withinRange(value)) return potentialType; } throw new TribbleException("Integer cannot be encoded in allowable range of even INT32: " + value); @@ -218,9 +215,9 @@ public static BCF2Type determineIntegerType(final int value) { public static BCF2Type determineIntegerType(final int[] values) { // find the min and max values in the array int max = 0, min = 0; - for ( final int v : values ) { - if ( v > max ) max = v; - if ( v < min ) min = v; + for (final int v : values) { + if (v > max) max = v; + if (v < min) min = v; } final BCF2Type maxType = determineIntegerType(max); @@ -240,23 +237,32 @@ public static BCF2Type determineIntegerType(final int[] values) { * @return */ public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { - switch ( t1 ) { - case INT8: return t2; - case INT16: return t2 == BCF2Type.INT32 ? t2 : t1; - case INT32: return t1; - default: throw new TribbleException("BUG: unexpected BCF2Type " + t1); + switch (t1) { + case INT8: + return t2; + case INT16: + return t2 == BCF2Type.INT32 ? t2 : t1; + case INT32: + return t1; + default: + throw new TribbleException("BUG: unexpected BCF2Type " + t1); } } public static BCF2Type determineIntegerType(final List values) { BCF2Type maxType = BCF2Type.INT8; - for ( final int value : values ) { + for (final int value : values) { final BCF2Type type1 = determineIntegerType(value); - switch ( type1 ) { - case INT8: break; - case INT16: maxType = BCF2Type.INT16; break; - case INT32: return BCF2Type.INT32; // fast path for largest possible value - default: throw new TribbleException("Unexpected integer type " + type1 ); + switch (type1) { + case INT8: + break; + case INT16: + maxType = BCF2Type.INT16; + break; + case INT32: + return BCF2Type.INT32; // fast path for largest possible value + default: + throw new TribbleException("Unexpected integer type " + type1); } } return maxType; @@ -275,16 +281,14 @@ public static BCF2Type determineIntegerType(final List values) { * @return */ public static List toList(final Class c, final Object o) { - if ( o == null ) return Collections.emptyList(); - else if ( o instanceof List ) return (List)o; - else if ( o.getClass().isArray() ) { + if (o == null) return Collections.emptyList(); + else if (o instanceof List) return (List) o; + else if (o.getClass().isArray()) { final int arraySize = Array.getLength(o); final List list = new ArrayList(arraySize); - for (int i=0; i outputLinesIt = outputHeader.getIDHeaderLines().iterator(); - final Iterator inputLinesIt = genotypesBlockHeader.getIDHeaderLines().iterator(); + final Iterator outputLinesIt = + outputHeader.getIDHeaderLines().iterator(); + final Iterator inputLinesIt = + genotypesBlockHeader.getIDHeaderLines().iterator(); - while ( inputLinesIt.hasNext() ) { - if ( ! outputLinesIt.hasNext() ) // missing lines in output - return false; + while (inputLinesIt.hasNext()) { + if (!outputLinesIt.hasNext()) // missing lines in output + return false; final VCFIDHeaderLine outputLine = outputLinesIt.next(); final VCFIDHeaderLine inputLine = inputLinesIt.next(); - if ( ! inputLine.getClass().equals(outputLine.getClass()) || ! inputLine.getID().equals(outputLine.getID()) ) - return false; + if (!inputLine.getClass().equals(outputLine.getClass()) + || !inputLine.getID().equals(outputLine.getID())) return false; } return true; } private static List nullAsEmpty(List l) { - if ( l == null ) - return Collections.emptyList(); - else - return l; + if (l == null) return Collections.emptyList(); + else return l; } } diff --git a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java index b18b83e4aa..ebdda67630 100644 --- a/src/main/java/htsjdk/variant/bcf2/BCFVersion.java +++ b/src/main/java/htsjdk/variant/bcf2/BCFVersion.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.bcf2; @@ -78,13 +78,12 @@ public int getMinorVersion() { public static BCFVersion readBCFVersion(final InputStream stream) throws IOException { final byte[] magicBytes = new byte[MAGIC_HEADER_START.length]; stream.read(magicBytes); - if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) { + if (Arrays.equals(magicBytes, MAGIC_HEADER_START)) { // we're a BCF file final int majorByte = stream.read(); final int minorByte = stream.read(); - return new BCFVersion( majorByte, minorByte ); - } else - return null; + return new BCFVersion(majorByte, minorByte); + } else return null; } @Override diff --git a/src/main/java/htsjdk/variant/example/PrintVariantsExample.java b/src/main/java/htsjdk/variant/example/PrintVariantsExample.java index 24ecd101c7..f6aecc368e 100755 --- a/src/main/java/htsjdk/variant/example/PrintVariantsExample.java +++ b/src/main/java/htsjdk/variant/example/PrintVariantsExample.java @@ -33,7 +33,6 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; - import java.io.File; import java.io.IOException; import java.net.InetAddress; @@ -51,8 +50,7 @@ * - the second argument is optional and is the name of the output file (nothing gets written if this argument is missing) */ public final class PrintVariantsExample { - private PrintVariantsExample() { - } + private PrintVariantsExample() {} private static final Log log = Log.getInstance(PrintVariantsExample.class); @@ -69,8 +67,15 @@ public static void main(final String[] args) throws IOException { log.info("Start with args:" + Arrays.toString(args)); printConfigurationInfo(); - try (final VariantContextWriter writer = outputFile == null ? null : new VariantContextWriterBuilder().setOutputFile(outputFile).setOutputFileType(VariantContextWriterBuilder.OutputType.VCF).unsetOption(Options.INDEX_ON_THE_FLY).build(); - final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), false)) { + try (final VariantContextWriter writer = outputFile == null + ? null + : new VariantContextWriterBuilder() + .setOutputFile(outputFile) + .setOutputFileType(VariantContextWriterBuilder.OutputType.VCF) + .unsetOption(Options.INDEX_ON_THE_FLY) + .build(); + final AbstractFeatureReader reader = + AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), false)) { log.info(reader.getClass().getSimpleName() + " hasIndex " + reader.hasIndex()); if (writer != null) { @@ -92,12 +97,14 @@ public static void main(final String[] args) throws IOException { } private static void printConfigurationInfo() throws IOException { - log.info("Executing as " + - System.getProperty("user.name") + '@' + InetAddress.getLocalHost().getHostName() + - " on " + System.getProperty("os.name") + ' ' + System.getProperty("os.version") + - ' ' + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") + - ' ' + System.getProperty("java.runtime.version")); + log.info("Executing as " + System.getProperty("user.name") + + '@' + InetAddress.getLocalHost().getHostName() + " on " + + System.getProperty("os.name") + ' ' + System.getProperty("os.version") + ' ' + + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") + ' ' + + System.getProperty("java.runtime.version")); - log.info(Defaults.allDefaults().entrySet().stream().map(e -> e.getKey() + ':' + e.getValue()).collect(Collectors.joining(" "))); + log.info(Defaults.allDefaults().entrySet().stream() + .map(e -> e.getKey() + ':' + e.getValue()) + .collect(Collectors.joining(" "))); } } diff --git a/src/main/java/htsjdk/variant/utils/BinomialCoefficientUtil.java b/src/main/java/htsjdk/variant/utils/BinomialCoefficientUtil.java index ea5b6e7c11..3d6a53a213 100644 --- a/src/main/java/htsjdk/variant/utils/BinomialCoefficientUtil.java +++ b/src/main/java/htsjdk/variant/utils/BinomialCoefficientUtil.java @@ -1,8 +1,5 @@ package htsjdk.variant.utils; -import java.lang.ArithmeticException; -import java.lang.Math; - /** * A modified version of the Apache Math implementation of binomial * coefficient calculation @@ -24,7 +21,6 @@ * Copyright 2010-2012 CS Systèmes d'Information * */ - public class BinomialCoefficientUtil { /** @@ -111,12 +107,14 @@ public static long binomialCoefficient(final int n, final int k) throws Arithmet * @param k Size of the subsets to be counted. * @throws IllegalArgumentException if {@code n < 0} or {@code k > n}. */ - private static void checkBinomial(final int n, final int k) throws IllegalArgumentException{ + private static void checkBinomial(final int n, final int k) throws IllegalArgumentException { if (n < k) { - throw new IllegalArgumentException("The first value (" + n + ") must not be exceeded by the second value (" + k + ") in a binomial coefficient"); + throw new IllegalArgumentException("The first value (" + n + ") must not be exceeded by the second value (" + + k + ") in a binomial coefficient"); } if (n < 0) { - throw new IllegalArgumentException("The first value (" + n + ") in a binomial coefficient must not be negative."); + throw new IllegalArgumentException( + "The first value (" + n + ") in a binomial coefficient must not be negative."); } } @@ -151,10 +149,8 @@ private static void checkBinomial(final int n, final int k) throws IllegalArgume private static int gcd(int p, int q) throws ArithmeticException { int a = p; int b = q; - if (a == 0 || - b == 0) { - if (a == Integer.MIN_VALUE || - b == Integer.MIN_VALUE) { + if (a == 0 || b == 0) { + if (a == Integer.MIN_VALUE || b == Integer.MIN_VALUE) { throw new ArithmeticException("overflow: gcd(" + p + ", " + q + ") is 2^31"); } return Math.abs(a + b); @@ -164,7 +160,7 @@ private static int gcd(int p, int q) throws ArithmeticException { long bl = b; boolean useLong = false; if (a < 0) { - if(Integer.MIN_VALUE == a) { + if (Integer.MIN_VALUE == a) { useLong = true; } else { a = -a; @@ -180,7 +176,7 @@ private static int gcd(int p, int q) throws ArithmeticException { bl = -bl; } if (useLong) { - if(al == bl) { + if (al == bl) { throw new ArithmeticException("overflow: gcd(" + p + ", " + q + ") is 2^31"); } long blbu = bl; @@ -225,8 +221,7 @@ private static int gcd(int p, int q) throws ArithmeticException { private static int gcdPositive(int a, int b) { if (a == 0) { return b; - } - else if (b == 0) { + } else if (b == 0) { return a; } @@ -286,7 +281,6 @@ private static long mulAndCheck(long a, long b) throws ArithmeticException { ret = a * b; } else { throw new ArithmeticException(); - } } else { // assert b == 0 @@ -309,5 +303,4 @@ private static long mulAndCheck(long a, long b) throws ArithmeticException { } return ret; } - } diff --git a/src/main/java/htsjdk/variant/utils/GeneralUtils.java b/src/main/java/htsjdk/variant/utils/GeneralUtils.java index 56230ed3ba..5798691955 100644 --- a/src/main/java/htsjdk/variant/utils/GeneralUtils.java +++ b/src/main/java/htsjdk/variant/utils/GeneralUtils.java @@ -1,27 +1,27 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package htsjdk.variant.utils; @@ -46,7 +46,7 @@ public class GeneralUtils { * The smallest log10 value we'll emit from normalizeFromLog10 and other functions * where the real-space value is 0.0. */ - public final static double LOG10_P_OF_ZERO = -1000000.0; + public static final double LOG10_P_OF_ZERO = -1000000.0; /** * Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of @@ -65,11 +65,11 @@ public static String join(final String separator, final Collection object final Iterator iter = objects.iterator(); final T first = iter.next(); - if ( ! iter.hasNext() ) // fast path for singleton collections - return first.toString(); + if (!iter.hasNext()) // fast path for singleton collections + return first.toString(); else { // full path for 2+ collection that actually need a join final StringBuilder ret = new StringBuilder(first.toString()); - while(iter.hasNext()) { + while (iter.hasNext()) { ret.append(separator); ret.append(iter.next().toString()); } @@ -124,19 +124,16 @@ public static double[] normalizeFromLog10(double[] array, boolean takeLog10OfOut // default case: go to linear space double[] normalized = new double[array.length]; - for (int i = 0; i < array.length; i++) - normalized[i] = Math.pow(10, array[i] - maxValue); + for (int i = 0; i < array.length; i++) normalized[i] = Math.pow(10, array[i] - maxValue); // normalize double sum = 0.0; - for (int i = 0; i < array.length; i++) - sum += normalized[i]; + for (int i = 0; i < array.length; i++) sum += normalized[i]; for (int i = 0; i < array.length; i++) { double x = normalized[i] / sum; if (takeLog10OfOutput) { x = Math.log10(x); - if ( x < LOG10_P_OF_ZERO || Double.isInfinite(x) ) - x = array[i] - maxValue; + if (x < LOG10_P_OF_ZERO || Double.isInfinite(x)) x = array[i] - maxValue; } normalized[i] = x; @@ -154,13 +151,11 @@ public static int maxElementIndex(final double[] array) { } public static int maxElementIndex(final double[] array, final int endIndex) { - if (array == null || array.length == 0) - throw new IllegalArgumentException("Array cannot be null!"); + if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); int maxI = 0; for (int i = 1; i < endIndex; i++) { - if (array[i] > array[maxI]) - maxI = i; + if (array[i] > array[maxI]) maxI = i; } return maxI; @@ -186,20 +181,19 @@ public static List cons(final T elt, final List l) { * @param withReplacement if false, the resulting permutations will only contain unique objects from objects * @return */ - public static List> makePermutations(final List objects, final int n, final boolean withReplacement) { + public static List> makePermutations( + final List objects, final int n, final boolean withReplacement) { final List> combinations = new ArrayList>(); - if ( n <= 0 ) + if (n <= 0) ; - else if ( n == 1 ) { - for ( final T o : objects ) - combinations.add(Collections.singletonList(o)); + else if (n == 1) { + for (final T o : objects) combinations.add(Collections.singletonList(o)); } else { final List> sub = makePermutations(objects, n - 1, withReplacement); - for ( List subI : sub ) { - for ( final T a : objects ) { - if ( withReplacement || ! subI.contains(a) ) - combinations.add(cons(a, subI)); + for (List subI : sub) { + for (final T a : objects) { + if (withReplacement || !subI.contains(a)) combinations.add(cons(a, subI)); } } } @@ -236,11 +230,9 @@ public static byte compareDoubles(double a, double b, double epsilon) { return 1; } - static public final List reverse(final List l) { + public static final List reverse(final List l) { final List newL = new ArrayList(l); Collections.reverse(newL); return newL; } } - - diff --git a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java index 305575a70f..224da2bc30 100644 --- a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java +++ b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java @@ -32,7 +32,6 @@ import htsjdk.samtools.util.*; import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.vcf.VCFFileReader; - import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -53,9 +52,11 @@ enum TYPE { @Override SAMSequenceDictionary extractDictionary(final Path reference) { - final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary(); + final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference) + .getSequenceDictionary(); if (dict == null) - throw new SAMException("Could not find dictionary next to reference file " + reference.toUri().toString()); + throw new SAMException("Could not find dictionary next to reference file " + + reference.toUri().toString()); return dict; } }, @@ -63,8 +64,8 @@ SAMSequenceDictionary extractDictionary(final Path reference) { @Override SAMSequenceDictionary extractDictionary(final Path dictionary) { - try (BufferedLineReader bufferedLineReader = - new BufferedLineReader(ParsingUtils.openInputStream(dictionary.toUri().toString()))) { + try (BufferedLineReader bufferedLineReader = new BufferedLineReader( + ParsingUtils.openInputStream(dictionary.toUri().toString()))) { final SAMTextHeaderCodec codec = new SAMTextHeaderCodec(); final SAMFileHeader header = codec.decode(bufferedLineReader, dictionary.toString()); return header.getSequenceDictionary(); @@ -74,7 +75,7 @@ SAMSequenceDictionary extractDictionary(final Path dictionary) { } }, CRAM(FileExtensions.CRAM) { - + @Override SAMSequenceDictionary extractDictionary(final Path cramPath) { IOUtil.assertFileIsReadable(cramPath); @@ -102,7 +103,7 @@ SAMSequenceDictionary extractDictionary(final Path sam) { @Override SAMSequenceDictionary extractDictionary(final Path vcf) { - try (VCFFileReader vcfPathReader = new VCFFileReader(vcf, false)){ + try (VCFFileReader vcfPathReader = new VCFFileReader(vcf, false)) { return vcfPathReader.getFileHeader().getSequenceDictionary(); } } @@ -129,7 +130,9 @@ SAMSequenceDictionary extractDictionary(final Path intervalList) { * @deprecated in favor of {@link VCFFileReader##extractDictionary(Path) } * */ @Deprecated - SAMSequenceDictionary extractDictionary(final File file) {return extractDictionary(file.toPath());} + SAMSequenceDictionary extractDictionary(final File file) { + return extractDictionary(file.toPath()); + } abstract SAMSequenceDictionary extractDictionary(final Path file); @@ -149,7 +152,10 @@ static TYPE forFile(final Path dictionaryExtractable) { } } } - throw new SAMException("Cannot figure out type of file " + dictionaryExtractable.toUri().toString() + " from extension. Current implementation understands the following types: " + Arrays.toString(TYPE.values())); + throw new SAMException("Cannot figure out type of file " + + dictionaryExtractable.toUri().toString() + + " from extension. Current implementation understands the following types: " + + Arrays.toString(TYPE.values())); } @Override @@ -169,5 +175,4 @@ public static SAMSequenceDictionary extractDictionary(final File file) { public static SAMSequenceDictionary extractDictionary(final Path path) { return TYPE.forFile(path).extractDictionary(path); } - } diff --git a/src/main/java/htsjdk/variant/utils/VCFHeaderReader.java b/src/main/java/htsjdk/variant/utils/VCFHeaderReader.java index 9d4be262d2..fc4f85293e 100644 --- a/src/main/java/htsjdk/variant/utils/VCFHeaderReader.java +++ b/src/main/java/htsjdk/variant/utils/VCFHeaderReader.java @@ -1,6 +1,5 @@ package htsjdk.variant.utils; -import htsjdk.samtools.SamStreams; import htsjdk.samtools.cram.io.InputStreamUtils; import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.IOUtil; @@ -12,7 +11,6 @@ import htsjdk.variant.bcf2.BCFVersion; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; - import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; @@ -24,7 +22,7 @@ */ public final class VCFHeaderReader { - private VCFHeaderReader(){} + private VCFHeaderReader() {} /** * Read a VCF header from a stream that may be a VCF file (possibly gzip or block compressed) or a BCF file. @@ -38,7 +36,8 @@ private VCFHeaderReader(){} */ public static VCFHeader readHeaderFrom(final SeekableStream in) throws IOException { final long initialPos = in.position(); - byte[] magicBytes = InputStreamUtils.readFully(bufferAndDecompressIfNecessary(in), BCFVersion.MAGIC_HEADER_START.length); + byte[] magicBytes = + InputStreamUtils.readFully(bufferAndDecompressIfNecessary(in), BCFVersion.MAGIC_HEADER_START.length); in.seek(initialPos); if (magicBytes[0] == '#') { // VCF return readHeaderFrom(in, new VCFCodec()); @@ -54,7 +53,8 @@ private static InputStream bufferAndDecompressIfNecessary(final InputStream in) return IOUtil.isGZIPInputStream(bis) ? new GZIPInputStream(bis) : bis; } - private static VCFHeader readHeaderFrom(final InputStream in, final FeatureCodec featureCodec) throws IOException { + private static VCFHeader readHeaderFrom( + final InputStream in, final FeatureCodec featureCodec) throws IOException { InputStream is = bufferAndDecompressIfNecessary(in); FeatureCodecHeader headerCodec = featureCodec.readHeader(featureCodec.makeSourceFromStream(is)); return (VCFHeader) headerCodec.getHeaderValue(); diff --git a/src/main/java/htsjdk/variant/variantcontext/Allele.java b/src/main/java/htsjdk/variant/variantcontext/Allele.java index 002782570f..de0bcb4afa 100644 --- a/src/main/java/htsjdk/variant/variantcontext/Allele.java +++ b/src/main/java/htsjdk/variant/variantcontext/Allele.java @@ -34,11 +34,11 @@ * Types of alleles: *

    *
    - Ref: a t C g a // C is the reference base
    - : a t G g a // C base is a G in some individuals
    - : a t - g a // C base is deleted w.r.t. the reference
    - : a t CAg a // A base is inserted w.r.t. the reference sequence
    - 
    + * Ref: a t C g a // C is the reference base + * : a t G g a // C base is a G in some individuals + * : a t - g a // C base is deleted w.r.t. the reference + * : a t CAg a // A base is inserted w.r.t. the reference sequence + * *

    In these cases, where are the alleles?

    *