diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml new file mode 100644 index 0000000..cad3607 --- /dev/null +++ b/.github/workflows/publish-pypi.yml @@ -0,0 +1,57 @@ +name: Publish Python Package + +on: + release: + types: [published] + workflow_dispatch: + inputs: + version: + description: 'Version to publish (e.g., 1.0.0)' + required: false + +env: + PYTHON_VERSION: '3.9' + +jobs: + deploy: + name: Build and Publish + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + cache-dependency-path: 'pyproject.toml' + + - name: Install build dependencies + run: | + python -m pip install --upgrade pip + pip install build twine + + - name: Build package + run: | + python -m build --sdist --wheel --outdir dist/ + + - name: Verify package + run: | + twine check dist/* + + - name: Publish to PyPI + if: github.event_name == 'release' && github.event.action == 'published' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + skip-existing: true + verbose: true + + - name: Show package info + run: | + echo "Package built for version: ${{ github.event.release.tag_name }}" + ls -la dist/ diff --git a/.github/workflows/publish_docker.yaml b/.github/workflows/publish_docker.yaml index 7fed7a0..745f6de 100644 --- a/.github/workflows/publish_docker.yaml +++ b/.github/workflows/publish_docker.yaml @@ -1,64 +1,160 @@ -name: Publish Docker image to ghcr.io +name: Publish Docker Image + on: - push: - tags: - - "*" + push: + branches: [ main, master ] + tags: [ '*', '!*-*' ] # Match v1.2.3 but not v1.2.3-rc1 + pull_request: + branches: [ main, master ] + workflow_dispatch: + inputs: + version: + description: 'Version to build (e.g., v1.2.3)' + required: false + +# Set job-level environment variables +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + DOCKERFILE_PATH: ./Dockerfile + BUILDX_CACHE_DIR: /tmp/.buildx-cache + BUILDX_CACHE_KEY: ${{ github.ref }}-${{ github.sha }} + jobs: - push_to_registries: - name: Build and publish Docker image - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v3 - - name: Set up QEMU - uses: docker/setup-qemu-action@v2 - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 - - name: Prepare - # In this preparation step, a few configurations are made - # according to tags that will be used to export the image - # for Docker Hub, as well as the name of the image itself - id: prep - run: | - DOCKER_IMAGE=ghcr.io/msk-access/genotype_variants - VERSION=noop - if [ "${{ github.event_name }}" = "schedule" ]; then - VERSION=nightly - elif [[ $GITHUB_REF == refs/tags/* ]]; then - VERSION=${GITHUB_REF#refs/tags/} - elif [[ $GITHUB_REF == refs/heads/* ]]; then - VERSION=$(echo ${GITHUB_REF#refs/heads/} | sed -r 's#/+#-#g') - fi - TAGS="${DOCKER_IMAGE}:${VERSION}" - if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then - MINOR=${VERSION%.*} - MAJOR=${MINOR%.*} - TAGS="$TAGS,${DOCKER_IMAGE}:latest" - elif [[ $VERSION =~ ^v[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then - VERSION=$(echo ${VERSION#v}) - TAGS="${DOCKER_IMAGE}:${VERSION}" - elif [ "${{ github.event_name }}" = "push" ]; then - TAGS="$TAGS,${DOCKER_IMAGE}:sha-${GITHUB_SHA::8}" - fi - echo ::set-output name=version::${VERSION} - echo ::set-output name=tags::${TAGS} - - name: Login to GitHub Container Registry - #if: github.event_name != 'pull_request' - uses: docker/login-action@v2 - with: - registry: ghcr.io - username: ${{ github.repository_owner }} - password: ${{ secrets.RS_PAT }} - - name: Push to GitHub Packages - uses: docker/build-push-action@v3 - with: - context: . - file: ./Dockerfile - push: true - tags: ${{ steps.prep.outputs.tags }} - build-args: | - GENOTYPE_VARIANTS_VERSION=${{ steps.prep.outputs.version }} - labels: | - org.opencontainers.image.title=${{ github.event.repository.name }} - org.opencontainers.image.description=${{ github.event.repository.description }} - org.opencontainers.image.version=${{ steps.prep.outputs.version }} + build-and-push: + name: Build and Push Docker Image + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + security-events: write # For Trivy SARIF upload + + strategy: + fail-fast: false + matrix: + platform: [linux/amd64, linux/arm64] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Needed for version detection + submodules: recursive + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: arm64,amd64 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + with: + install: true + driver-opts: | + image=moby/buildkit:latest + network=host + buildkitd-config-inline: | + [worker.oci] + max-parallelism = 4 + + - name: Get current date + id: date + run: echo "date=$(date -u +'%Y-%m-%dT%H:%M:%SZ')" >> $GITHUB_OUTPUT + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=schedule,pattern=nightly + type=ref,event=branch + type=ref,event=pr + type=semver,pattern={{version}} + type=semver,pattern={{major}}.{{minor}} + type=semver,pattern={{major}} + type=sha,format=long,prefix=sha- + type=raw,value=latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} + flavor: | + latest=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }} + labels: | + org.opencontainers.image.title=${{ github.event.repository.name }} + org.opencontainers.image.description=${{ github.event.repository.description }} + org.opencontainers.image.url=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.source=${{ github.server_url }}/${{ github.repository }} + org.opencontainers.image.created=${{ steps.date.outputs.date }} + org.opencontainers.image.revision=${{ github.sha }} + org.opencontainers.image.version=${{ github.ref_name }} + + - name: Log in to GitHub Container Registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Docker Buildx cache + uses: actions/cache@v3 + with: + path: ${{ env.BUILDX_CACHE_DIR }} + key: buildx-${{ runner.os }}-${{ matrix.platform }}-${{ env.BUILDX_CACHE_KEY }} + restore-keys: | + buildx-${{ runner.os }}-${{ matrix.platform }}- + buildx-${{ runner.os }}- + + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + file: ${{ env.DOCKERFILE_PATH }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + platforms: ${{ matrix.platform }} + cache-from: type=local,src=${{ env.BUILDX_CACHE_DIR }} + cache-to: type=local,dest=${{ env.BUILDX_CACHE_DIR }}-new,mode=max + build-args: | + BUILDKIT_INLINE_CACHE=1 + GENOTYPE_VARIANTS_VERSION=${{ github.ref_name }} + BUILD_VERSION=${{ github.ref_name }} + BUILD_DATE=${{ steps.date.outputs.date }} + VCS_REF=${{ github.sha }} + provenance: ${{ github.event_name != 'pull_request' }} + sbom: true + secrets: | + GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} + + - name: Run Trivy vulnerability scanner + if: github.event_name != 'pull_request' + uses: aquasecurity/trivy-action@master + with: + image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@${{ steps.meta.outputs.version }} + format: 'sarif' + output: 'trivy-results.sarif' + severity: 'CRITICAL,HIGH' + ignore-unfixed: true + vuln-type: 'os,library' + exit-code: '1' + timeout: '5m' + + - name: Upload Trivy scan results to GitHub Security tab + if: always() && (github.event_name != 'pull_request') + uses: github/codeql-action/upload-sarif@v2 + with: + sarif_file: 'trivy-results.sarif' + category: 'container-scan' + + - name: Update Buildx cache + if: github.event_name != 'pull_request' + run: | + rm -rf ${{ env.BUILDX_CACHE_DIR }} + mv ${{ env.BUILDX_CACHE_DIR }}-new ${{ env.BUILDX_CACHE_DIR }} + echo "Updated build cache" + + - name: Show image details + if: always() + run: | + echo "Built image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}" + echo "Tags: ${{ steps.meta.outputs.tags }}" + echo "Labels: ${{ steps.meta.outputs.labels }}" diff --git a/.github/workflows/validate.yaml b/.github/workflows/validate.yaml index 6f24294..79a5dde 100644 --- a/.github/workflows/validate.yaml +++ b/.github/workflows/validate.yaml @@ -1,43 +1,68 @@ -name: validate +name: Validate + on: push: + branches: [ main ] paths-ignore: - - 'docs/**' - - '**.md' - - '**.rst' - tags-ignore: - - v* + - 'docs/**' + - '**.md' + - '**.rst' + - '**.gitignore' + - '.github/**' pull_request: paths-ignore: - - 'docs/**' - - '**.md' - - '**.rst' + - 'docs/**' + - '**.md' + - '**.rst' + - '**.gitignore' + - '.github/**' + +env: + PYTHON_VERSION: '3.9' jobs: - test_nucleo: - runs-on: ${{ matrix.platform }} - if: "!contains(github.event.head_commit.message, 'ci skip')" + test: + name: Test Python 3.9 on Ubuntu + runs-on: ubuntu-latest + if: github.event.head_commit == null || !contains(github.event.head_commit.message, 'ci skip') + strategy: - max-parallel: 4 - matrix: - platform: [ubuntu-latest] - python-version: [3.7, 3.8] + fail-fast: false + steps: - - name: Checkout repo - uses: actions/checkout@v2 + - name: Checkout repository + uses: actions/checkout@v4 with: - ref: ${{ github.head_ref }} + fetch-depth: 0 submodules: recursive - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} - - name: Install Python dependencies + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + cache-dependency-path: 'pyproject.toml' + + - name: Set up environment and install dependencies + run: | + # Create virtual environment + python -m venv .venv + + # Activate and set up the environment + source .venv/bin/activate + + # Ensure pip is up to date and install build tools + python -m pip install --upgrade pip setuptools wheel build + + # Install package with dev dependencies from pyproject.toml + pip install -e ".[dev]" + + - name: Build package + run: | + source .venv/bin/activate + python -m build --sdist --wheel --outdir dist/ . + + - name: Run tests run: | - python -m pip install --upgrade pip - pip install tox tox-gh-actions - - name: Test with tox - id: run-tox - run: tox -vv - env: - PLATFORM: ${{ matrix.platform }} + source .venv/bin/activate + python -m pytest tests/ -v diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..9ef5ed7 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,56 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally set the version of Python and requirements required to build your docs +python: + version: "3.9" + install: + - method: pip + path: . + extra_requirements: + - docs + +# Optionally build docs in additional formats such as PDF +formats: all + +# Optionally set the version of Sphinx +sphinx: + configuration: docs/conf.py + fail_on_warning: true + +# Optionally enable PDF/Epub builds +build: + os: ubuntu-22.04 + tools: + python: "3.9" + jobs: + post_create_environment: + - pip install -r docs/requirements.txt + +# Optionally include or exclude patterns +# build: +# file: docs/conf.py +# include_paths: +# - docs/ +# - src/ +# exclude_patterns: +# - .venv/ +# - build/ +# - dist/ +# - tests/ + +# Optionally set the default branch +defaults: + version: latest + +# Optionally hide build date +html_theme_options: + display_version: true + style_external_links: true diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index dcac41d..0000000 --- a/.travis.yml +++ /dev/null @@ -1,27 +0,0 @@ -# Config file for automatic testing at travis-ci.org - -language: python -python: - - 3.8 - - 3.7 - -# Command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors -install: pip install -U tox-travis - -# Command to run tests, e.g. python setup.py test -script: tox - -# Assuming you have installed the travis-ci CLI tool, after you -# create the Github repo and add it to Travis, run the -# following command to finish PyPI deployment setup: -# $ travis encrypt --add deploy.password -deploy: - provider: pypi - distributions: sdist bdist_wheel - user: rhshah - password: - secure: PLEASE_REPLACE_ME - on: - tags: true - repo: rhshah/genotype_variants - python: 3.8 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 22ff26c..e72e60a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -102,9 +102,8 @@ Before you submit a pull request, check that it meets these guidelines: 2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.rst. -3. The pull request should work for Python 3.5, 3.6, 3.7 and 3.8, and for PyPy. Check - https://travis-ci.org/rhshah/genotype_variants/pull_requests - and make sure that the tests pass for all supported Python versions. +3. The pull request should work for Python 3.9, 3.10, and 3.11. Check + the GitHub Actions workflow and make sure that the tests pass for all supported Python versions. Tips ---- diff --git a/Dockerfile b/Dockerfile index 1fcd7b4..fe61ae8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,53 +1,116 @@ -################## Base Image ########## -ARG PYTHON_VERSION="3.8" -FROM --platform=linux/amd64 python:${PYTHON_VERSION}-slim +# Build stage for GetBaseCountsMultiSample +FROM --platform=linux/amd64 debian:bullseye-slim as builder -################## ARGUMENTS/Environments ########## -ARG BUILD_DATE -ARG BUILD_VERSION -ARG LICENSE="Apache-2.0" -ARG GENOTYPE_VARIANTS_VERSION ARG GBCMS_VERSION="1.2.5" -ARG VCS_REF - -################## METADATA ######################## -LABEL org.opencontainers.image.vendor="MSKCC" -LABEL org.opencontainers.image.authors="Eric Buehler (buehlere@mskcc.org)" +ARG DEBIAN_FRONTEND=noninteractive -LABEL org.opencontainers.image.created=${BUILD_DATE} \ - org.opencontainers.image.version=${BUILD_VERSION} \ - org.opencontainers.image.licenses=${LICENSE} \ - org.opencontainers.image.version.pvs=${GENOTYPE_VARIANTS_VERSION} \ - org.opencontainers.image.vcs-url="https://github.com/msk-access/genotype_variants.git" \ - org.opencontainers.image.vcs-ref=${VCS_REF} +# Install build dependencies with versions +RUN --mount=type=cache,target=/var/cache/apt \ + --mount=type=cache,target=/var/lib/apt \ + apt-get update && apt-get install --no-install-recommends -y \ + build-essential=12.9 \ + ca-certificates=20210119 \ + cmake=3.18.4-2+deb11u1 \ + curl=7.74.0-1.3+deb11u10 \ + g++=4:10.2.1-1 \ + gcc=4:10.2.1-1 \ + libjsoncpp-dev=1.9.4-4 \ + make=4.3-4.1 \ + unzip=6.0-26+deb11u1 \ + zlib1g-dev=1:1.2.11.dfsg-2+deb11u2 \ + && rm -rf /var/lib/apt/lists/* -LABEL org.opencontainers.image.description="This container uses python3.8 as the base image to build \ - genotype_variants ${GENOTYPE_VARIANTS_VERSION}" +# Build GetBaseCountsMultiSample with caching +RUN --mount=type=cache,target=/root/.cache \ + cd /opt && \ + curl -fsSL -o v${GBCMS_VERSION}.tar.gz \ + "https://github.com/msk-access/GetBaseCountsMultiSample/archive/refs/tags/v${GBCMS_VERSION}.tar.gz" && \ + tar xzf v${GBCMS_VERSION}.tar.gz && \ + cd /opt/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ + mkdir -p build && \ + cd build/ && \ + cmake -DCMAKE_CXX_FLAGS=-std=c++03 -DCMAKE_BUILD_TYPE=Release .. && \ + make -j$(nproc) && \ + make install && \ + cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ + cd /opt/GetBaseCountsMultiSample-${GBCMS_VERSION} && \ + make -j$(nproc) && \ + cp GetBaseCountsMultiSample /usr/local/bin/ -ADD . /opt/genotype_variants -################## INSTALL ########################## +# Final stage +FROM --platform=linux/amd64 python:3.9-slim -# get build tools and install genotype variants +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PYTHONPATH=/app \ + PATH="/app/.local/bin:$PATH" \ + DEBIAN_FRONTEND=noninteractive \ + GIT_PYTHON_REFRESH=quiet -RUN apt-get update && apt-get install --no-install-recommends -y build-essential ca-certificates openssl gcc g++ make zlib1g-dev cmake libjsoncpp-dev curl unzip \ - && apt-get clean \ +# Install runtime dependencies with versions +RUN --mount=type=cache,target=/var/cache/apt \ + --mount=type=cache,target=/var/lib/apt \ + apt-get update && apt-get install --no-install-recommends -y \ + libgomp1=10.2.1-6 \ + libjsoncpp24=1.9.4-4 \ + zlib1g=1:1.2.11.dfsg-2+deb11u2 \ && rm -rf /var/lib/apt/lists/* -RUN cd /opt/ && \ - curl -L -O "https://github.com/msk-access/GetBaseCountsMultiSample/archive/refs/tags/v${GBCMS_VERSION}.tar.gz" && \ - tar xzvf v${GBCMS_VERSION}.tar.gz && \ - cd /opt/GetBaseCountsMultiSample-${GBCMS_VERSION}/bamtools-master && \ - rm -r build/ && \ - mkdir build && \ - cd build/ && \ - cmake -DCMAKE_CXX_FLAGS=-std=c++03 .. && \ - make && \ - make install && \ - cp ../lib/libbamtools.so.2.3.0 /usr/lib/ && \ - cd /opt/GetBaseCountsMultiSample-${GBCMS_VERSION} && \ - make && \ - cp GetBaseCountsMultiSample /usr/local/bin/ +# Copy GetBaseCountsMultiSample and its dependencies from builder +COPY --from=builder /usr/local/bin/GetBaseCountsMultiSample /usr/local/bin/ +COPY --from=builder /usr/lib/libbamtools.so.2.3.0 /usr/lib/ +RUN ldconfig + +# Set working directory +WORKDIR /app + +# Create non-root user early for better layer caching +RUN groupadd -r appuser && \ + useradd -r -g appuser appuser && \ + mkdir -p /app && \ + chown -R appuser:appuser /app + +# Copy only necessary files for installation +COPY --chown=appuser:appuser pyproject.toml setup.py README.rst ./ +COPY --chown=appuser:appuser genotype_variants/ ./genotype_variants/ + +# Install Python dependencies and package with uv for faster builds +USER appuser +ENV PATH="/home/appuser/.local/bin:${PATH}" + +# Install uv and build the package +RUN --mount=type=cache,target=/home/appuser/.cache/pip \ + python -m pip install --user --no-warn-script-location uv && \ + uv pip install --no-cache-dir -e .[dev] + +# Set default command and entrypoint +ENTRYPOINT ["python", "-m", "genotype_variants"] +CMD ["--help"] + +# Metadata +ARG BUILD_DATE +ARG BUILD_VERSION +ARG GENOTYPE_VARIANTS_VERSION +ARG VCS_REF + +LABEL org.opencontainers.image.vendor="MSKCC" \ + org.opencontainers.image.authors="Ronak Shah " \ + org.opencontainers.image.created=${BUILD_DATE} \ + org.opencontainers.image.version=${BUILD_VERSION} \ + org.opencontainers.image.licenses="Apache-2.0" \ + org.opencontainers.image.version.pvs=${GENOTYPE_VARIANTS_VERSION} \ + org.opencontainers.image.vcs-url="https://github.com/msk-access/genotype_variants" \ + org.opencontainers.image.vcs-ref=${VCS_REF} \ + org.opencontainers.image.documentation="https://github.com/msk-access/genotype_variants#readme" \ + org.opencontainers.image.source="https://github.com/msk-access/genotype_variants" \ + org.opencontainers.image.title="Genotype Variants" \ + org.opencontainers.image.description="A tool for genotyping SNV, INDEL, and SV variants in genomic data" -RUN cd /opt/genotype_variants && \ - pip install -r requirements_dev.txt && \ - python setup.py install +# Health check with more comprehensive testing +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python -c "import sys, pkg_resources; \ + pkg_resources.get_distribution('genotype_variants'); \ + import genotype_variants; \ + print(f'Version: {genotype_variants.__version__}'); \ + sys.exit(0)" || exit 1 diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 965b2dd..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -include AUTHORS.rst -include CONTRIBUTING.rst -include HISTORY.rst -include LICENSE -include README.rst - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] - -recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/Makefile b/Makefile deleted file mode 100644 index f7ebd3f..0000000 --- a/Makefile +++ /dev/null @@ -1,85 +0,0 @@ -.PHONY: clean clean-test clean-pyc clean-build docs help -.DEFAULT_GOAL := help - -define BROWSER_PYSCRIPT -import os, webbrowser, sys - -from urllib.request import pathname2url - -webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) -endef -export BROWSER_PYSCRIPT - -define PRINT_HELP_PYSCRIPT -import re, sys - -for line in sys.stdin: - match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) - if match: - target, help = match.groups() - print("%-20s %s" % (target, help)) -endef -export PRINT_HELP_PYSCRIPT - -BROWSER := python -c "$$BROWSER_PYSCRIPT" - -help: - @python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) - -clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts - -clean-build: ## remove build artifacts - rm -fr build/ - rm -fr dist/ - rm -fr .eggs/ - find . -name '*.egg-info' -exec rm -fr {} + - find . -name '*.egg' -exec rm -f {} + - -clean-pyc: ## remove Python file artifacts - find . -name '*.pyc' -exec rm -f {} + - find . -name '*.pyo' -exec rm -f {} + - find . -name '*~' -exec rm -f {} + - find . -name '__pycache__' -exec rm -fr {} + - -clean-test: ## remove test and coverage artifacts - rm -fr .tox/ - rm -f .coverage - rm -fr htmlcov/ - rm -fr .pytest_cache - -lint: ## check style with flake8 - flake8 genotype_variants tests - -test: ## run tests quickly with the default Python - python setup.py test - -test-all: ## run tests on every Python version with tox - tox - -coverage: ## check code coverage quickly with the default Python - coverage run --source genotype_variants setup.py test - coverage report -m - coverage html - $(BROWSER) htmlcov/index.html - -docs: ## generate Sphinx HTML documentation, including API docs - rm -f docs/genotype_variants.rst - rm -f docs/modules.rst - sphinx-apidoc -o docs/ genotype_variants - $(MAKE) -C docs clean - $(MAKE) -C docs html - $(BROWSER) docs/_build/html/index.html - -servedocs: docs ## compile the docs watching for changes - watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . - -release: dist ## package and upload a release - twine upload dist/* - -dist: clean ## builds source and wheel package - python setup.py sdist - python setup.py bdist_wheel - ls -l dist - -install: clean ## install the package to the active Python's site-packages - python setup.py install diff --git a/genotype_variants/__init__.py b/genotype_variants/__init__.py index c6881cf..e4f22ce 100644 --- a/genotype_variants/__init__.py +++ b/genotype_variants/__init__.py @@ -1,5 +1,5 @@ """Top-level package for genotype_variants.""" __author__ = """Ronak Shah""" -__email__ = 'rons.shah@gmail.com' -__version__ = '0.3.8' +__email__ = "rons.shah@gmail.com" +__version__ = "0.3.10" diff --git a/genotype_variants/cli.py b/genotype_variants/cli.py index de8adab..c1508ab 100644 --- a/genotype_variants/cli.py +++ b/genotype_variants/cli.py @@ -3,6 +3,7 @@ import sys import pathlib import logging + try: import click except ImportError as e: @@ -42,7 +43,8 @@ __date__ = "2020-01-29" __updated__ = "2022-04-28" -plugin_folder = os.path.join(os.path.dirname(__file__), 'commands') +plugin_folder = os.path.join(os.path.dirname(__file__), "commands") + class MyCLI(click.MultiCommand): @@ -50,7 +52,7 @@ def list_commands(self, ctx): """Dynamically get the list of commands.""" rv = [] for filename in os.listdir(plugin_folder): - if filename.endswith('.py') and not filename.startswith('__init__'): + if filename.endswith(".py") and not filename.startswith("__init__"): rv.append(filename[:-3]) rv.sort() return rv @@ -58,15 +60,17 @@ def list_commands(self, ctx): def get_command(self, ctx, name): """Dynamically get the command.""" ns = {} - fn = os.path.join(plugin_folder, name + '.py') + fn = os.path.join(plugin_folder, name + ".py") with open(fn) as f: - code = compile(f.read(), fn, 'exec') + code = compile(f.read(), fn, "exec") eval(code, ns, ns) - return ns['cli'] + return ns["cli"] @click.command(cls=MyCLI) -@click.version_option(None, "-v", "--version", message="%(version)s", prog_name="genotype_variants") +@click.version_option( + None, "-v", "--version", message="%(version)s", prog_name="genotype_variants" +) def main(args=None): """Console script for genotype_variants.""" pass diff --git a/genotype_variants/commands/__init__.py b/genotype_variants/commands/__init__.py index f6c3ef8..6ac5b8d 100644 --- a/genotype_variants/commands/__init__.py +++ b/genotype_variants/commands/__init__.py @@ -2,4 +2,4 @@ __author__ = """Ronak Shah""" __email__ = 'rons.shah@gmail.com' -__version__ = '0.3.8' +__version__ = '0.3.10' diff --git a/genotype_variants/commands/small_variants.py b/genotype_variants/commands/small_variants.py index 065ae5a..f7bf1ae 100644 --- a/genotype_variants/commands/small_variants.py +++ b/genotype_variants/commands/small_variants.py @@ -112,7 +112,7 @@ def cli(): "-fd", "--filter-duplicate", required=False, - default=0, + default=1, type=click.INT, help="Filter duplicate parameter for GetBaseCountMultiSample", ) @@ -160,7 +160,7 @@ def generate( fragment_count, mapping_quality, threads, - sample_id + sample_id, ): """Command that helps to generate genotyped MAF, the output file will be labelled with @@ -199,12 +199,12 @@ def generate( if not (patient_id or sample_id): logger.error( "genotype_variants:small_variants:generate:: either Patient ID or Sample ID must be provided", - ) + ) exit(1) if patient_id: - logger.info("small_variants: Patient ID: %s", patient_id) + logger.info("small_variants: Patient ID: %s", patient_id) if sample_id: - logger.info("small_variants: Sample ID: %s", sample_id) + logger.info("small_variants: Sample ID: %s", sample_id) if standard_bam: logger.info("small_variants: Standard BAM: %s", standard_bam) if duplex_bam: @@ -244,7 +244,7 @@ def generate( fragment_count, mapping_quality, threads, - sample_id + sample_id, ) p1 = run_cmd(cmd) logger.info( @@ -266,7 +266,7 @@ def generate( fragment_count, mapping_quality, threads, - sample_id + sample_id, ) p2 = run_cmd(cmd) logger.info( @@ -288,7 +288,7 @@ def generate( fragment_count, mapping_quality, threads, - sample_id + sample_id, ) p3 = run_cmd(cmd) logger.info( @@ -307,55 +307,77 @@ def generate( logger.info("--------------------------------------------------") return (std_output_maf, simplex_output_maf, duplex_output_maf) + @click_log.simple_verbosity_option(logger) def generate_gbcms_cmd( - input_maf, - btype, - reference_fasta, - gbcms_path, - patient_id, - bam, - filter_duplicate, - fragment_count, - mapping_quality, - threads, - sample_id -): + input_maf: str, + btype: str, + reference_fasta: str, + gbcms_path: str, + patient_id: str, + bam: str, + filter_duplicate: bool, + fragment_count: int, + mapping_quality: int, + threads: int, + sample_id: str = None, +) -> tuple[str, pathlib.Path]: + """Generate command for GetBaseCountMultiSample. - """This will help generate command for GetBaseCountMultiSample""" - - # if no sample_id is provided, it is inferred from the patient_id - if not sample_id: - logger.warning("genotype_variants:small_variants:generate_gbcms: No Sample ID found: Inferring Sample ID from Patient ID for for Geontyping.") - sample_id = patient_id - logger.info("genotype_variants:small_variants:generate_gbcms: Sample ID found. Genotyping using Sample ID.") - outfile = sample_id + "-" + btype + "_genotyped.maf" - output_maf = pathlib.Path.cwd().joinpath(outfile) - cmd = ( - str(gbcms_path) - + " --bam " - + sample_id - + ":" - + str(bam) - + " --filter_duplicate " - + str(filter_duplicate) - + " --fragment_count " - + str(fragment_count) - + " --maf " - + str(input_maf) - + " --maq " - + str(mapping_quality) - + " --omaf" - + " --output " - + str(output_maf) - + " --fasta " - + str(reference_fasta) - + " --thread " - + str(threads) - + " --generic_counting" - ) + Args: + input_maf: Path to input MAF file + btype: Type of barcode, either 'STANDARD' or other + reference_fasta: Path to reference FASTA file + gbcms_path: Path to GetBaseCountMultiSample executable + patient_id: Patient ID + bam: Path to BAM file + filter_duplicate: Whether to filter duplicates + fragment_count: Fragment count threshold + mapping_quality: Minimum mapping quality + threads: Number of threads to use + sample_id: Sample ID (defaults to patient_id if not provided) - return (cmd, output_maf) + Returns: + tuple: (command_string, output_maf_path) + """ + if not all([input_maf, btype, reference_fasta, gbcms_path, patient_id, bam]): + raise ValueError("Missing required arguments") + + # Use provided sample_id or fall back to patient_id + sample_id = sample_id or patient_id + if sample_id == patient_id: + logger.warning( + "genotype_variants:small_variants:generate_gbcms: " + "No Sample ID provided, using Patient ID: %s", + patient_id, + ) + + # Prepare output filename + output_maf = pathlib.Path.cwd() / f"{sample_id}-{btype}_genotyped.maf" + + # Build command components + cmd_parts = [ + str(gbcms_path), + f"--bam {sample_id}:{bam}", + f"--filter_duplicate {int(filter_duplicate)}", + f"--fragment_count {fragment_count}", + f"--maf {input_maf}", + f"--maq {mapping_quality}", + "--omaf", + f"--output {output_maf}", + f"--fasta {reference_fasta}", + f"--thread {threads}", + ] + + # Add generic_counting flag for non-STANDARD btype + if btype != "STANDARD": + cmd_parts.append("--generic_counting") + + # Join all command parts with spaces + cmd = " ".join(cmd_parts) + + logger.debug("Generated GBCMS command: %s", cmd) + return cmd, output_maf # Merge @@ -397,7 +419,13 @@ def generate_gbcms_cmd( ) @click_log.simple_verbosity_option(logger) def merge( - patient_id, input_maf, input_standard_maf, input_duplex_maf, input_simplex_maf, sample_id, tumor_name_override + patient_id, + input_maf, + input_standard_maf, + input_duplex_maf, + input_simplex_maf, + sample_id, + tumor_name_override, ): """ Given original input MAF used as an input for GBCMS along with @@ -486,18 +514,18 @@ def merge( if not (patient_id or sample_id): logger.error( "genotype_variants:small_variants:generate:: either Patient ID or Sample ID must be provided", - ) + ) exit(1) if patient_id: - bam_id = patient_id + bam_id = patient_id if sample_id: - bam_id = sample_id + bam_id = sample_id logger.info("small_variants: ID: %s", bam_id) outfile = bam_id if d_maf is not None and s_maf is not None: ds_maf = cdsd(s_maf, d_maf) if tumor_name_override: - ds_maf['Tumor_Sample_Barcode'] = bam_id + ds_maf["Tumor_Sample_Barcode"] = bam_id file_name = pathlib.Path.cwd().joinpath( outfile + "-SIMPLEX-DUPLEX" + "_genotyped.maf" ) @@ -509,7 +537,7 @@ def merge( if o_maf is not None and i_maf is not None and ds_maf is not None: df_o_s_ds = camd(o_maf, i_maf, ds_maf) if tumor_name_override: - df_o_s_ds['Tumor_Sample_Barcode'] = bam_id + df_o_s_ds["Tumor_Sample_Barcode"] = bam_id file_name = pathlib.Path.cwd().joinpath( outfile + "-ORG-STD-SIMPLEX-DUPLEX" + "_genotyped.maf" ) @@ -517,15 +545,13 @@ def merge( elif o_maf is not None and i_maf is not None: df_o_s = camd(o_maf, i_maf, None) if tumor_name_override: - df_o_s['Tumor_Sample_Barcode'] = bam_id - file_name = pathlib.Path.cwd().joinpath( - outfile + "-ORG-STD" + "_genotyped.maf" - ) + df_o_s["Tumor_Sample_Barcode"] = bam_id + file_name = pathlib.Path.cwd().joinpath(outfile + "-ORG-STD" + "_genotyped.maf") write_csv(file_name, df_o_s) elif o_maf is not None and ds_maf is not None: df_o_ds = camd(o_maf, None, ds_maf) if tumor_name_override: - df_o_ds['Tumor_Sample_Barcode'] = bam_id + df_o_ds["Tumor_Sample_Barcode"] = bam_id file_name = pathlib.Path.cwd().joinpath( outfile + "-ORG-SIMPLEX-DUPLEX" + "_genotyped.maf" ) @@ -533,7 +559,7 @@ def merge( elif i_maf is not None and ds_maf is not None: df_s_ds = camd(None, i_maf, ds_maf) if tumor_name_override: - df_s_ds['Tumor_Sample_Barcode'] = bam_id + df_s_ds["Tumor_Sample_Barcode"] = bam_id file_name = pathlib.Path.cwd().joinpath( outfile + "-STD-SIMPLEX-DUPLEX" + "_genotyped.maf" ) @@ -551,7 +577,7 @@ def merge( outfile + "-SIMPLEX-DUPLEX" + "_genotyped.maf" ) if tumor_name_override: - ds_maf['Tumor_Sample_Barcode'] = bam_id + ds_maf["Tumor_Sample_Barcode"] = bam_id write_csv(file_name, ds_maf) t1_stop = time.perf_counter() t2_stop = time.process_time() @@ -564,55 +590,56 @@ def merge( def create_empty_maf_if_missing(filename): header = [ - 'Hugo_Symbol', - 'Entrez_Gene_Id', - 'Center', - 'NCBI_Build', - 'Chromosome', - 'Start_Position', - 'End_Position', - 'Strand', - 'Variant_Classification', - 'Variant_Type', - 'Reference_Allele', - 'Tumor_Seq_Allele1', - 'Tumor_Seq_Allele2', - 'dbSNP_RS', - 'dbSNP_Val_Status', - 'Tumor_Sample_Barcode', - 'Matched_Norm_Sample_Barcode', - 'Match_Norm_Seq_Allele1', - 'Match_Norm_Seq_Allele2', - 'Tumor_Validation_Allele1', - 'Tumor_Validation_Allele2', - 'Match_Norm_Validation_Allele1', - 'Match_Norm_Validation_Allele2', - 'Verification_Status', - 'Validation_Status', - 'Mutation_Status', - 'Sequencing_Phase', - 'Sequence_Source', - 'Validation_Method', - 'Score', - 'BAM_File', - 'Sequencer', - 't_ref_count', - 't_alt_count', - 'n_ref_count', - 'n_alt_count', - 'Caller', - 't_total_count', - 't_variant_frequency', - 't_total_count_forward', - 't_ref_count_forward', - 't_alt_count_forward', - 't_total_count_fragment', - 't_ref_count_fragment', - 't_alt_count_fragment'] + "Hugo_Symbol", + "Entrez_Gene_Id", + "Center", + "NCBI_Build", + "Chromosome", + "Start_Position", + "End_Position", + "Strand", + "Variant_Classification", + "Variant_Type", + "Reference_Allele", + "Tumor_Seq_Allele1", + "Tumor_Seq_Allele2", + "dbSNP_RS", + "dbSNP_Val_Status", + "Tumor_Sample_Barcode", + "Matched_Norm_Sample_Barcode", + "Match_Norm_Seq_Allele1", + "Match_Norm_Seq_Allele2", + "Tumor_Validation_Allele1", + "Tumor_Validation_Allele2", + "Match_Norm_Validation_Allele1", + "Match_Norm_Validation_Allele2", + "Verification_Status", + "Validation_Status", + "Mutation_Status", + "Sequencing_Phase", + "Sequence_Source", + "Validation_Method", + "Score", + "BAM_File", + "Sequencer", + "t_ref_count", + "t_alt_count", + "n_ref_count", + "n_alt_count", + "Caller", + "t_total_count", + "t_variant_frequency", + "t_total_count_forward", + "t_ref_count_forward", + "t_alt_count_forward", + "t_total_count_fragment", + "t_ref_count_fragment", + "t_alt_count_fragment", + ] if not os.path.exists(filename): empty_df = pd.DataFrame(columns=header) - empty_df.to_csv(filename, index=False, sep='\t') + empty_df.to_csv(filename, index=False, sep="\t") def write_csv(file_name, data_frame): @@ -743,8 +770,7 @@ def all( mapping_quality, threads, sample_id, - tumor_name_override - + tumor_name_override, ): """ Command that helps to generate genotyped MAF and @@ -785,10 +811,16 @@ def all( fragment_count, mapping_quality, threads, - sample_id + sample_id, ) final_file = merge.callback( - patient_id, input_maf, standard_maf, duplex_maf, simplex_maf, sample_id, tumor_name_override + patient_id, + input_maf, + standard_maf, + duplex_maf, + simplex_maf, + sample_id, + tumor_name_override, ) t1_stop = time.perf_counter() diff --git a/genotype_variants/create_all_maf_dataframe.py b/genotype_variants/create_all_maf_dataframe.py index fca5c1e..fe9a859 100644 --- a/genotype_variants/create_all_maf_dataframe.py +++ b/genotype_variants/create_all_maf_dataframe.py @@ -17,6 +17,8 @@ logger.info( "genotype:variants:small_variants::create_all_maf_dataframe:: Generating duplex simplex dataframe" ) + + # Adopted from Maysun script def create_all_maf_dataframe( original_dataframe, standard_dataframe, simplex_duplex_dataframe diff --git a/genotype_variants/create_duplex_simplex_dataframe.py b/genotype_variants/create_duplex_simplex_dataframe.py index 28ddf1a..76f4035 100644 --- a/genotype_variants/create_duplex_simplex_dataframe.py +++ b/genotype_variants/create_duplex_simplex_dataframe.py @@ -18,6 +18,8 @@ logger.info( "genotype:variants:small_variants::create_duplex_simplex_dataframe:: Generating duplex simplex dataframe" ) + + # Adopted from Maysun script def create_duplex_simplex_dataframe(simplex_dataframe, duplex_dataframe): """Code to merge duplex and simplex fragment counts in MAF format""" @@ -238,7 +240,12 @@ def create_duplex_simplex_dataframe(simplex_dataframe, duplex_dataframe): exit(1) ##Add - cols_min = ["t_ref_count_fragment_simplex","t_ref_count_fragment_duplex", "t_alt_count_fragment_simplex", "t_alt_count_fragment_duplex"] + cols_min = [ + "t_ref_count_fragment_simplex", + "t_ref_count_fragment_duplex", + "t_alt_count_fragment_simplex", + "t_alt_count_fragment_duplex", + ] df_ds[cols_min] = df_ds[cols_min].replace(np.nan, 0) if df_ds.shape[0] > 0: try: @@ -336,15 +343,15 @@ def create_duplex_simplex_dataframe(simplex_dataframe, duplex_dataframe): "Successfully merged data frame and the counts for simplex and duplex MAF" ) # fill NA command for columns - cols = [ - "t_total_count_fragment_simplex", - "t_vaf_fragment_simplex", - "t_total_count_fragment_duplex", - "t_vaf_fragment_duplex", - "t_ref_count_fragment_simplex_duplex", - "t_alt_count_fragment_simplex_duplex", - "t_total_count_fragment_simplex_duplex", - "t_vaf_fragment_simplex_duplex", - ] + cols = [ + "t_total_count_fragment_simplex", + "t_vaf_fragment_simplex", + "t_total_count_fragment_duplex", + "t_vaf_fragment_duplex", + "t_ref_count_fragment_simplex_duplex", + "t_alt_count_fragment_simplex_duplex", + "t_total_count_fragment_simplex_duplex", + "t_vaf_fragment_simplex_duplex", + ] df_ds[cols] = df_ds[cols].replace(np.nan, 0) return df_ds diff --git a/genotype_variants/run_cmd.py b/genotype_variants/run_cmd.py index c3b790b..8cb8bd7 100644 --- a/genotype_variants/run_cmd.py +++ b/genotype_variants/run_cmd.py @@ -1,5 +1,6 @@ import logging import subprocess + """ run_cmd ~~~~~~~~~~~~~~~ @@ -13,6 +14,7 @@ # Making logging possible logger = logging.getLogger("genotype_variants") + def run_cmd(cmd): """Code to run shell commands""" logger.debug( @@ -31,7 +33,5 @@ def run_cmd(cmd): if stderr is None: logger.debug("run_cmd: run: Read: %s", stdout.decode("utf-8")) else: - logger.error( - "run_cmd: run: could not run" - ) - return(out) + logger.error("run_cmd: run: could not run") + return out diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..c95a0ec --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,73 @@ +[build-system] +requires = ["setuptools>=68.0.0", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "genotype-variants" +version = "0.3.10" +description = "A tool for genotyping SNV, INDEL, and SV variants in genomic data." +readme = "README.rst" +authors = [ + {name = "Ronak Shah", email = "rons.shah@gmail.com"}, +] +license = {text = "Apache-2.0"} +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Science/Research", + "Intended Audience :: Healthcare Industry", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "Topic :: Scientific/Engineering :: Medical Science Apps.", +] +requires-python = ">=3.9" +dependencies = [ + "Click>=7.0", + "click_log>=0.3.2", + "pandas>=1.0.0", + "xlrd>=1.2.0", +] + +[project.optional-dependencies] +dev = [ + "black>=23.0.0", + "flake8>=6.0.0", + "isort>=5.0.0", + "mypy>=1.0.0", + "pytest>=7.0.0", + "pytest-cov>=4.0.0", + "tox>=4.0.0", + "sphinx>=7.0.0", + "sphinx-rtd-theme>=1.0.0", +] + +[project.scripts] +genotype_variants = "genotype_variants.cli:main" + +[tool.black] +line-length = 88 +target-version = ["py39"] +include = '\.pyi?$' + +[tool.isort] +profile = "black" +multi_line_output = 3 +include_trailing_comma = true +line_length = 88 + +[tool.mypy] +python_version = "3.9" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +check_untyped_defs = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = "test_*.py" +addopts = "-v --cov=genotype_variants --cov-report=term-missing" + +[metadata] +license_file = "LICENSE" diff --git a/requirements.in b/requirements.in new file mode 100644 index 0000000..243c204 --- /dev/null +++ b/requirements.in @@ -0,0 +1,6 @@ +# Core dependencies +Click==8.1.7 +click_log==0.4.0 +pandas>=1.3.0,<2.0.0 +xlrd>=2.0.0,<3.0.0 +numpy>=1.21.0,<2.0.0 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a6eb409 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,24 @@ +# +# This file is autogenerated by pip-compile with Python 3.9 +# To update, run: +# +# pip-compile --output-file=requirements.txt requirements.in +# +click==8.1.7 + # via -r requirements.in +click-log==0.4.0 + # via -r requirements.in +numpy==1.24.4 + # via + # -r requirements.in + # pandas +pandas==1.5.3 + # via -r requirements.in +python-dateutil==2.9.0.post0 + # via pandas +pytz==2024.1 + # via pandas +six==1.16.0 + # via python-dateutil +xlrd==2.0.1 + # via -r requirements.in diff --git a/requirements_dev.txt b/requirements_dev.txt index 6f351e8..e38f58a 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,14 +1,34 @@ -pip==20.0.2 -bump2version==0.5.11 -wheel==0.34.1 -watchdog==3.0.0 -flake8==3.7.9 -tox==3.14.3 -coverage==5.0.3 -Sphinx==2.3.1 -twine==3.1.1 -Click==7.0 -click-log==0.3.2 -pandas -xlrd==1.2.0 -numpy>=1.20.3 +# Development requirements +-r requirements.txt + +# Build & packaging +build>=0.10.0 +pip-tools>=7.0.0 +wheel>=0.41.0 +setuptools>=68.0.0 + +# Testing +tox>=4.5.0 +pytest>=7.4.0 +pytest-cov>=4.1.0 +coverage[toml]>=7.2.7 + +# Code quality +black>=23.7.0 +flake8>=6.1.0 +isort>=5.12.0 +mypy>=1.5.0 +bandit>=1.7.5 +safety>=2.3.5 + +# Documentation +Sphinx>=7.0.1 +sphinx-rtd-theme>=1.2.2 + +# Publishing +twine>=4.0.2 +bump2version>=1.0.1 + +# Development tools +watchdog>=3.0.0 +ipython>=8.14.0 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 14faee9..0000000 --- a/setup.cfg +++ /dev/null @@ -1,24 +0,0 @@ -[bumpversion] -current_version = 0.3.8 -commit = True -tag = True - -[bumpversion:file:setup.py] -search = version='{current_version}' -replace = version='{new_version}' - -[bumpversion:file:genotype_variants/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bumpversion:file:genotype_variants/commands/__init__.py] -search = __version__ = '{current_version}' -replace = __version__ = '{new_version}' - -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs - -[aliases] diff --git a/setup.py b/setup.py deleted file mode 100644 index 42d150b..0000000 --- a/setup.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python - -"""The setup script.""" - -from setuptools import setup, find_packages - -with open('README.rst') as readme_file: - readme = readme_file.read() - -with open('HISTORY.rst') as history_file: - history = history_file.read() - -requirements = ['Click>=7.0', 'click_log>=0.3.2', 'pandas>=1.0.0', 'xlrd>=1.2.0'] - -setup_requirements = [ ] - -test_requirements = [ ] - -setup( - author="Ronak Shah", - author_email='rons.shah@gmail.com', - python_requires='>=3.7', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Apache Software License', - 'Natural Language :: English', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], - description="Project to genotype SNV, INDEL and SV.", - entry_points={ - 'console_scripts': [ - 'genotype_variants=genotype_variants.cli:main', - ], - }, - install_requires=requirements, - license="Apache Software License 2.0", - long_description=readme + '\n\n' + history, - include_package_data=True, - keywords='genotype_variants', - name='genotype_variants', - packages=find_packages(include=['genotype_variants', 'genotype_variants.*']), - setup_requires=setup_requirements, - test_suite='tests', - tests_require=test_requirements, - url='https://github.com/msk-access/genotype_variants', - version='0.3.8', - zip_safe=False, -) diff --git a/tests/test_genotype_variants.py b/tests/test_genotype_variants.py index e132bfc..1bb1c99 100644 --- a/tests/test_genotype_variants.py +++ b/tests/test_genotype_variants.py @@ -11,14 +11,29 @@ create_duplex_simplex_dataframe as cdsd, ) + class TestGenotype_variants(unittest.TestCase): """Tests for `genotype_variants` package.""" def setUp(self): """Set up test fixtures, if any.""" - self.mutation_key = ['Chromosome', 'Start_Position', 'End_Position', 'Reference_Allele', 'Tumor_Seq_Allele2'] - self.d_maf = pd.read_csv('tests/test_data/C-100000-L002-d02-DUPLEX_genotyped.maf', sep="\t", header="infer") - self.s_maf = pd.read_csv('tests/test_data/C-100000-L002-d02-SIMPLEX_genotyped.maf', sep="\t", header="infer") + self.mutation_key = [ + "Chromosome", + "Start_Position", + "End_Position", + "Reference_Allele", + "Tumor_Seq_Allele2", + ] + self.d_maf = pd.read_csv( + "tests/test_data/C-100000-L002-d02-DUPLEX_genotyped.maf", + sep="\t", + header="infer", + ) + self.s_maf = pd.read_csv( + "tests/test_data/C-100000-L002-d02-SIMPLEX_genotyped.maf", + sep="\t", + header="infer", + ) self.d_maf = self.d_maf.set_index(self.mutation_key, drop=False) self.s_maf = self.s_maf.set_index(self.mutation_key, drop=False) @@ -45,23 +60,29 @@ def test_merge_simplex_duplex(self): """ df_merge = cdsd(self.s_maf, self.d_maf) df_merge = df_merge.sort_index() - expected = pd.read_csv('tests/test_data/C-100000-L002-d02-SIMPLEX-DUPLEX_genotyped.maf', sep='\t') + expected = pd.read_csv( + "tests/test_data/C-100000-L002-d02-SIMPLEX-DUPLEX_genotyped.maf", sep="\t" + ) expected = expected.set_index(self.mutation_key, drop=False) expected = expected.sort_index() pd.testing.assert_frame_equal(df_merge, expected) # SNP - snp_index = (16, 68842732, 68842732, 'A', 'C') - assert df_merge.loc[snp_index]['t_ref_count_fragment_simplex_duplex'] == 2737 - assert df_merge.loc[snp_index]['t_alt_count_fragment_simplex_duplex'] == 3 - assert df_merge.loc[snp_index]['t_total_count_fragment_simplex_duplex'] == 2740 + snp_index = (16, 68842732, 68842732, "A", "C") + assert df_merge.loc[snp_index]["t_ref_count_fragment_simplex_duplex"] == 2737 + assert df_merge.loc[snp_index]["t_alt_count_fragment_simplex_duplex"] == 3 + assert df_merge.loc[snp_index]["t_total_count_fragment_simplex_duplex"] == 2740 # # INS # insertion_index = (18, 48584855, 48584855, 'A', 'TTT') # assert df_merge.loc[insertion_index]['t_ref_count_fragment'] == 694 # assert df_merge.loc[insertion_index]['t_alt_count_fragment'] == 4 # assert df_merge.loc[insertion_index]['t_total_count_fragment'] == 698 # DEL - deletion_index = (5, 1295253, 1295262, 'GGGTCGGGAC', '-') - assert df_merge.loc[deletion_index]['t_ref_count_fragment_simplex_duplex'] == 537 - assert df_merge.loc[deletion_index]['t_alt_count_fragment_simplex_duplex'] == 0 - assert df_merge.loc[deletion_index]['t_total_count_fragment_simplex_duplex'] == 537 + deletion_index = (5, 1295253, 1295262, "GGGTCGGGAC", "-") + assert ( + df_merge.loc[deletion_index]["t_ref_count_fragment_simplex_duplex"] == 537 + ) + assert df_merge.loc[deletion_index]["t_alt_count_fragment_simplex_duplex"] == 0 + assert ( + df_merge.loc[deletion_index]["t_total_count_fragment_simplex_duplex"] == 537 + ) diff --git a/tox.ini b/tox.ini deleted file mode 100644 index de957be..0000000 --- a/tox.ini +++ /dev/null @@ -1,18 +0,0 @@ -[tox] -envlist = py37, py38, flake8 - -[travis] -python = - 3.8: py38 - 3.7: py37 - -[testenv:flake8] -basepython = python -deps = flake8 -commands = flake8 genotype_variants - -[testenv] -setenv = - PYTHONPATH = {toxinidir} - -commands = python -m unittest discover