diff --git a/.github/actions/python_build/action.yml b/.github/actions/python_build/action.yml index 4875167..002ffb2 100644 --- a/.github/actions/python_build/action.yml +++ b/.github/actions/python_build/action.yml @@ -12,7 +12,7 @@ runs: using: "composite" steps: - name: Configure python interpreter - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: cache: 'pip' cache-dependency-path: '.ci-pip-cache-key' diff --git a/.github/actions/scala_build/action.yml b/.github/actions/scala_build/action.yml index 11adbe4..644b10e 100644 --- a/.github/actions/scala_build/action.yml +++ b/.github/actions/scala_build/action.yml @@ -18,7 +18,7 @@ runs: using: "composite" steps: - name: Configure JDK - uses: actions/setup-java@v5 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: java-version: '17' distribution: 'zulu' @@ -28,7 +28,7 @@ runs: shell: bash run: echo "MAVEN_OPTS=-Xmx4g -XX:+UseG1GC" >> $GITHUB_ENV - name: Configure python interpreter - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: cache: 'pip' # caches dependencies for faster subsequent runs cache-dependency-path: '.ci-pip-cache-key' diff --git a/.github/actions/upload_artifacts/action.yml b/.github/actions/upload_artifacts/action.yml index 788c947..67d7894 100644 --- a/.github/actions/upload_artifacts/action.yml +++ b/.github/actions/upload_artifacts/action.yml @@ -4,12 +4,12 @@ runs: using: "composite" steps: - name: upload build artifacts - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: build-artifacts path: staging/build-artifacts/* - name: upload user artifacts - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: user-artifacts path: staging/user-artifacts/* diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 1706818..82a54ae 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -7,8 +7,20 @@ updates: directory: "/" schedule: interval: "weekly" + # Labs lockdown policy: wait 7 days after a release before opening a PR + # (reduces exposure to supply-chain attacks that rely on fast-propagating new versions). + cooldown: + default-days: 7 - package-ecosystem: "pip" directory: "/python/geobrix" schedule: interval: "weekly" + # Labs lockdown policy: wait 7 days after a release before opening a PR + # (reduces exposure to supply-chain attacks that rely on fast-propagating new versions). + cooldown: + default-days: 7 + + # NOTE: No `github-actions` ecosystem entry: the lockdown policy disables + # Dependabot updates for Actions so SHA pins are not silently bumped. + # Action SHAs are refreshed manually via `scripts/security/pin-gh-actions`. diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml index 3b4ab3a..7d96080 100644 --- a/.github/workflows/build_main.yml +++ b/.github/workflows/build_main.yml @@ -15,17 +15,19 @@ on: workflow_dispatch: {} permissions: contents: read - id-token: write jobs: # Regenerate doc-snippet-inventory and push to the PR branch (not master). Only for PRs targeting master. update-doc-inventory: runs-on: larger if: github.event_name == 'pull_request' && github.base_ref == 'master' + # Scoped to protected env so REPO_ACCESS_TOKEN is only available to approved workflow runs. + environment: runtime permissions: + # Needed to push the regenerated inventory back onto the PR head branch. contents: write steps: - name: Checkout PR head branch - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} ref: ${{ github.head_ref }} @@ -41,6 +43,8 @@ jobs: git push origin HEAD:${{ github.head_ref }} build: runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -52,14 +56,14 @@ jobs: spark: [ 4.0.0 ] steps: - name: checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Create pip cache key file run: | echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml', '.github/actions/python_build/action.yml') }} @@ -96,7 +100,7 @@ jobs: fi ls -la coverage-reports/ - name: Upload coverage artifacts - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: coverage-reports path: coverage-reports @@ -113,12 +117,12 @@ jobs: contents: read steps: - name: Download coverage artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: name: coverage-reports path: coverage-reports - name: Upload to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: token: ${{ secrets.CODECOV_TOKEN }} directory: coverage-reports diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python.yml index 7cd1931..b35b8a3 100644 --- a/.github/workflows/build_python.yml +++ b/.github/workflows/build_python.yml @@ -6,10 +6,11 @@ on: - "python/**" permissions: contents: read - id-token: write jobs: build: runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -21,14 +22,14 @@ jobs: spark: [ 4.0.0 ] steps: - name: checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Create pip cache key file run: | echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v4 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml', '.github/actions/python_build/action.yml') }} diff --git a/.github/workflows/build_scala.yml b/.github/workflows/build_scala.yml index 00bd609..d683b87 100644 --- a/.github/workflows/build_scala.yml +++ b/.github/workflows/build_scala.yml @@ -5,10 +5,11 @@ on: - "scala/**" permissions: contents: read - id-token: write jobs: build: runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -20,14 +21,14 @@ jobs: spark: [ 4.0.0 ] steps: - name: checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Create pip cache key file run: | echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml', '.github/actions/python_build/action.yml') }} @@ -38,7 +39,7 @@ jobs: - name: upload artifacts uses: ./.github/actions/upload_artifacts - name: Publish test coverage to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: token: ${{ secrets.CODECOV_TOKEN }} files: target/scoverage.xml,target/scoverage-report/scoverage.xml diff --git a/.github/workflows/build_scala_by_package.yml b/.github/workflows/build_scala_by_package.yml index 4b25de7..12a6e2a 100644 --- a/.github/workflows/build_scala_by_package.yml +++ b/.github/workflows/build_scala_by_package.yml @@ -9,12 +9,13 @@ on: permissions: contents: read - id-token: write jobs: test-package: name: Test ${{ matrix.package }} runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -28,7 +29,7 @@ jobs: spark: [4.0.0] steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} @@ -37,7 +38,7 @@ jobs: echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml') }} diff --git a/.github/workflows/codecov-scala-parallel.yml b/.github/workflows/codecov-scala-parallel.yml index bcfeb0b..f775900 100644 --- a/.github/workflows/codecov-scala-parallel.yml +++ b/.github/workflows/codecov-scala-parallel.yml @@ -13,12 +13,13 @@ on: permissions: contents: read - id-token: write jobs: coverage-package: name: Coverage ${{ matrix.package }} runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -32,7 +33,7 @@ jobs: spark: [4.0.0] steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} @@ -41,7 +42,7 @@ jobs: echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml') }} @@ -55,7 +56,7 @@ jobs: suite_pattern: "com.databricks.labs.gbx.${{ matrix.package }}.*" - name: Upload scoverage for package - uses: actions/upload-artifact@v5 + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 with: name: scoverage-${{ matrix.package }} path: target/scoverage.xml @@ -66,14 +67,16 @@ jobs: runs-on: larger needs: coverage-package if: always() && needs.coverage-package.result == 'success' + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Download all package coverage artifacts - uses: actions/download-artifact@v5 + uses: actions/download-artifact@70fc10c6e5e1ce46ad2ea6f2b72d43f7d47b13c3 # v8.0.0 with: path: coverage-artifacts pattern: scoverage-* @@ -97,7 +100,7 @@ jobs: fi - name: Upload to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: token: ${{ secrets.CODECOV_TOKEN }} files: merged/scoverage.xml diff --git a/.github/workflows/codecov-upload.yml b/.github/workflows/codecov-upload.yml index b8d8180..0946300 100644 --- a/.github/workflows/codecov-upload.yml +++ b/.github/workflows/codecov-upload.yml @@ -16,12 +16,13 @@ on: permissions: contents: read - id-token: write jobs: coverage: name: Build, test with coverage, upload runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} strategy: @@ -33,7 +34,7 @@ jobs: spark: [ 4.0.0 ] steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} @@ -42,7 +43,7 @@ jobs: echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml', '.github/actions/python_build/action.yml') }} @@ -60,7 +61,7 @@ jobs: enable_coverage: "true" - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 with: token: ${{ secrets.CODECOV_TOKEN }} files: target/scoverage.xml,target/scoverage-report/scoverage.xml,python/geobrix/coverage.xml diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 8cbe9ac..eb26b16 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -10,6 +10,9 @@ on: - cron: '18 16 * * 0' workflow_dispatch: +permissions: + contents: read + jobs: analyze: name: Analyze @@ -17,6 +20,8 @@ jobs: # For private repos this may require GitHub Advanced Security. Remove this line once enabled. if: false runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime permissions: actions: read contents: read @@ -31,14 +36,14 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} # Initializes the CodeQL tools for scanning. # Requires: repo Settings → Code security and analysis → Code scanning (enable). - name: Initialize CodeQL - uses: github/codeql-action/init@v4 + uses: github/codeql-action/init@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. @@ -51,7 +56,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v4 + uses: github/codeql-action/autobuild@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 # â„šī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -64,4 +69,4 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v4 + uses: github/codeql-action/analyze@0d579ffd059c29b07949a3cce3983f0780820c98 # v4.32.6 diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml index d909be1..ac4e437 100644 --- a/.github/workflows/deploy-docs.yml +++ b/.github/workflows/deploy-docs.yml @@ -18,8 +18,6 @@ on: permissions: contents: read - pages: write - id-token: write concurrency: group: "pages" @@ -28,15 +26,17 @@ concurrency: jobs: build: runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime steps: # For restricted GITHUB_TOKEN, add secret REPO_ACCESS_TOKEN (PAT with repo scope) if needed. - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Setup Node - uses: actions/setup-node@v4 + uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 with: node-version: "20" cache: "npm" @@ -53,11 +53,15 @@ jobs: DOCS_PUBLIC_PAGES: '1' - name: Upload artifact - uses: actions/upload-pages-artifact@v3 + uses: actions/upload-pages-artifact@7b1f4a764d45c48632c6b24a0339c27f5614fb0b # v4.0.0 with: path: docs/build deploy: + # Scoped perms (per policy least-privilege): only the deploy job needs pages/id-token write. + permissions: + pages: write + id-token: write environment: name: github-pages url: ${{ steps.deploy.outputs.page_url }} @@ -66,4 +70,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deploy - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 diff --git a/.github/workflows/doc-tests.yml b/.github/workflows/doc-tests.yml index bddc043..7b77cf4 100644 --- a/.github/workflows/doc-tests.yml +++ b/.github/workflows/doc-tests.yml @@ -38,17 +38,28 @@ jobs: test-python-docs: name: Python Documentation Tests runs-on: larger - if: false # Disabled until project launch + # SECURITY — READ BEFORE RE-ENABLING THIS JOB + # This job is workflow_run-triggered, checks out + # ref: github.event.workflow_run.head_sha + # and binds environment: runtime (which exposes REPO_ACCESS_TOKEN). + # When the upstream "build main" runs from a fork PR, head_sha is the + # FORK's commit — re-enabling without an origin guard would execute + # attacker-controlled code with REPO_ACCESS_TOKEN in scope. + # Before removing `if: false`, REPLACE it with the origin guard: + # if: github.event.workflow_run.head_repository.full_name == github.repository + if: false # Disabled until project launch — see security banner above before re-enabling. + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }} - name: Set up Python 3.12 - uses: actions/setup-python@v5 + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: python-version: '3.12' cache: 'pip' @@ -69,7 +80,7 @@ jobs: --cov-report=xml - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@671740ac38dd9b0130fbe1cec585b89eea48d3de # v5.5.2 if: success() with: file: ./coverage.xml @@ -91,16 +102,27 @@ jobs: test-scala-docs: name: Scala Documentation Tests runs-on: larger - if: false # Disabled until Scala doc tests are created - + # SECURITY — READ BEFORE RE-ENABLING THIS JOB + # This job is workflow_run-triggered and binds environment: runtime + # (which exposes REPO_ACCESS_TOKEN). It does not currently set an + # explicit `ref:` on checkout, but the sibling jobs in this file do — + # so re-enablers commonly copy that pattern. Whether or not you add + # a head_sha ref, you MUST gate this job by origin or a fork PR can + # cause a workflow_run that exposes REPO_ACCESS_TOKEN to fork code. + # Before removing `if: false`, REPLACE it with the origin guard: + # if: github.event.workflow_run.head_repository.full_name == github.repository + if: false # Disabled until Scala doc tests are created — see security banner above before re-enabling. + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime + steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Set up JDK 11 - uses: actions/setup-java@v5 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: java-version: '11' distribution: 'temurin' @@ -113,11 +135,22 @@ jobs: validate-structure: name: Validate Documentation Structure runs-on: larger - if: false # Disabled until project launch + # SECURITY — READ BEFORE RE-ENABLING THIS JOB + # This job is workflow_run-triggered, checks out + # ref: github.event.workflow_run.head_sha + # and binds environment: runtime (which exposes REPO_ACCESS_TOKEN). + # When the upstream "build main" runs from a fork PR, head_sha is the + # FORK's commit — re-enabling without an origin guard would execute + # attacker-controlled code with REPO_ACCESS_TOKEN in scope. + # Before removing `if: false`, REPLACE it with the origin guard: + # if: github.event.workflow_run.head_repository.full_name == github.repository + if: false # Disabled until project launch — see security banner above before re-enabling. + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime steps: - name: Checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} ref: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.head_sha || github.sha }} diff --git a/.github/workflows/publish-maven.yml b/.github/workflows/publish-maven.yml index 230d0ef..85b7976 100644 --- a/.github/workflows/publish-maven.yml +++ b/.github/workflows/publish-maven.yml @@ -1,18 +1,32 @@ name: Publish package to GitHub Packages +# +# DISABLED per Databricks Labs Repository Lockdown policy. +# GeoBrix is not publishing packages from GitHub Actions at this time; Maven artifact +# distribution (if/when re-enabled) will be reviewed with Labs first. +# +# To re-enable: remove the `if: false` on the `publish` job below and coordinate with +# databrickslabs maintainers to restore the `release` trigger and required secrets. on: release: types: [created] + +permissions: + contents: read + jobs: publish: + if: false # Disabled per Labs lockdown policy — see banner above. runs-on: larger + # Checkout uses REPO_ACCESS_TOKEN (non-exempt secret), so gate behind the protected env. + environment: runtime permissions: contents: read packages: write steps: - - uses: actions/checkout@v5 + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - - uses: actions/setup-java@v5 + - uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 with: java-version: '17' distribution: 'zulu' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d95dc52..d801b75 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,13 +1,25 @@ name: publish python package to pypi +# +# DISABLED per Databricks Labs Repository Lockdown policy. +# GeoBrix is not publishing to PyPI from GitHub Actions at this time; PyPI release +# automation (if/when re-enabled) will be reviewed with Labs first. +# +# To re-enable: remove the `if: false` on the `build-n-publish` job below and coordinate +# with databrickslabs maintainers to restore the `release` trigger and required secrets. on: release: types: [published] +permissions: + contents: read + jobs: build-n-publish: + if: false # Disabled per Labs lockdown policy — see banner above. name: Build project and publish to PyPI runs-on: larger - environment: release + # Use the single protected env `runtime` so REPO_ACCESS_TOKEN / OIDC issuance are gated. + environment: runtime permissions: # Used to authenticate to PyPI via OIDC and sign the release's artifacts with sigstore-python. id-token: write @@ -22,14 +34,14 @@ jobs: spark: [ 4.0.0 ] steps: - name: checkout code - uses: actions/checkout@v5 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 with: token: ${{ secrets.REPO_ACCESS_TOKEN || secrets.GITHUB_TOKEN }} - name: Create pip cache key file run: | echo "${{ github.ref }}-${{ matrix.python }}-${{ matrix.numpy }}-${{ matrix.spark }}-${{ matrix.gdal }}" > .ci-pip-cache-key - name: Cache apt packages - uses: actions/cache@v5 + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 with: path: .cache/apt-archives key: apt-${{ runner.os }}-${{ hashFiles('.github/actions/scala_build/action.yml', '.github/actions/python_build/action.yml') }} @@ -40,6 +52,6 @@ jobs: # As of DEC, 2025 v1 is the latest - name: Publish a Python distribution to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 with: packages_dir: python/geobrix/dist/ diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 0000000..72619d3 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,13 @@ +# CODEOWNERS for databrickslabs/geobrix +# +# Required by the Databricks Labs Repository Lockdown policy +# (https://databricks.atlassian.net/wiki/spaces/UN/pages/778928444). +# Combined with branch protection, listed owners must approve every PR +# before it can be merged. +# +# Membership is managed at the org level on the GitHub team page: +# https://github.com/orgs/databrickslabs/teams/geobrix-write +# +# Syntax: https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-security/customizing-your-repository/about-code-owners + +* @databrickslabs/geobrix-write diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile index 9d0e5f1..7369ef4 100644 --- a/scripts/docker/Dockerfile +++ b/scripts/docker/Dockerfile @@ -2,12 +2,20 @@ ARG BUILDPLATFORM=linux/amd64 ARG GDAL_VERSION=3.11.4 ARG PDAL_VERSION=2.8.2 +# Pinned commit for PDAL ${PDAL_VERSION} (Labs lockdown policy: tags are mutable). +# To bump PDAL_VERSION, resolve the new tag with: +# gh api repos/PDAL/PDAL/git/refs/tags/ --jq .object.sha +# and update PDAL_SHA in lockstep. +ARG PDAL_SHA=736fa0a66af4bed7105dff5fa152edf26bbb8a3a # ========================================== # STAGE 1: OS Base & Java # e.g. `docker build --target base -t geobrix-base .` # ========================================== -FROM --platform=${BUILDPLATFORM} ubuntu:24.04 AS base +# Pinned by digest (Labs lockdown policy). To refresh, re-pull ubuntu:24.04 from +# Docker Hub and update the sha256 below. Tag comment retained for readability. +FROM --platform=${BUILDPLATFORM} ubuntu@sha256:c4a8d5503dfb2a3eb8ab5f807da5bc69a85730fb49b5cfca2330194ebcc41c7b AS base +# ubuntu:24.04 RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates curl gnupg && mkdir -p /usr/share/keyrings @@ -60,10 +68,17 @@ WORKDIR $ROOTDIR/ FROM system-deps AS hadoop-builder ENV HADOOP_VERSION=3.4.0 ENV HADOOP_HOME=/usr/local/hadoop - -RUN mkdir -p $ROOTDIR/src && wget -qO- https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz | \ - tar -xzC $ROOTDIR/src/ && \ - mv $ROOTDIR/src/hadoop-${HADOOP_VERSION} $HADOOP_HOME && \ +# Pinned SHA-512 of the Apache Hadoop 3.4.0 source tarball (Labs lockdown policy). +# Cross-checked against https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz.sha512 +# To bump HADOOP_VERSION, update HADOOP_SHA512 in lockstep. +ENV HADOOP_SHA512=6f653c0109f97430047bd3677c50da7c8a2809d153b231794cf980b3208a6b4beff8ff1a03a01094299d459a3a37a3fe16731629987165d71f328657dbf2f24c + +RUN mkdir -p $ROOTDIR/src \ + && wget -q -O /tmp/hadoop.tar.gz https://dlcdn.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz \ + && echo "${HADOOP_SHA512} /tmp/hadoop.tar.gz" | sha512sum -c - \ + && tar -xzf /tmp/hadoop.tar.gz -C $ROOTDIR/src/ \ + && rm /tmp/hadoop.tar.gz \ + && mv $ROOTDIR/src/hadoop-${HADOOP_VERSION} $HADOOP_HOME && \ ln -s $HADOOP_HOME/bin/hadoop /usr/local/bin/hadoop && \ ln -s $HADOOP_HOME/bin/hdfs /usr/local/bin/hdfs && \ ln -s $HADOOP_HOME/bin/yarn /usr/local/bin/yarn && \ @@ -76,7 +91,15 @@ RUN mkdir -p $ROOTDIR/src && wget -qO- https://dlcdn.apache.org/hadoop/common/ha # ========================================== FROM hadoop-builder AS gdal-builder ARG GDAL_VERSION -RUN wget -qO- https://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz | tar -xzC $ROOTDIR/src/ +# Pinned SHA-256 of the GDAL 3.11.4 source tarball (Labs lockdown policy). +# Upstream OSGeo only publishes MD5; this SHA-256 was computed locally after +# MD5-verifying the upstream download (md5: 9f4fa4b3be48fb60d5dd76fecb11a5f6). +# To bump GDAL_VERSION, update GDAL_SHA256 in lockstep. +ENV GDAL_SHA256=0fa36ee34d4451db586d2bf78ea0dbfa3b0dfae0516587f8130d21add0ac9dad +RUN wget -q -O /tmp/gdal.tar.gz https://download.osgeo.org/gdal/${GDAL_VERSION}/gdal-${GDAL_VERSION}.tar.gz \ + && echo "${GDAL_SHA256} /tmp/gdal.tar.gz" | sha256sum -c - \ + && tar -xzf /tmp/gdal.tar.gz -C $ROOTDIR/src/ \ + && rm /tmp/gdal.tar.gz RUN mkdir -p src/gdal-${GDAL_VERSION}/build && cd src/gdal-${GDAL_VERSION}/build \ && cmake -G Ninja .. \ @@ -98,8 +121,13 @@ RUN ln -s $ROOTDIR/lib/libgdal.so /usr/lib/libgdal.so && \ # ========================================== FROM gdal-builder AS pdal-builder ARG PDAL_VERSION -RUN git clone --depth 1 -b ${PDAL_VERSION} https://github.com/PDAL/PDAL.git /tmp/pdal \ - && cd /tmp/pdal && mkdir -p build && cd build \ +ARG PDAL_SHA +RUN mkdir -p /tmp/pdal && cd /tmp/pdal \ + && git init -q \ + && git remote add origin https://github.com/PDAL/PDAL.git \ + && git fetch --depth 1 -q origin ${PDAL_SHA} \ + && git checkout -q FETCH_HEAD \ + && mkdir -p build && cd build \ && cmake -G Ninja .. \ -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local \ -DGDAL_CONFIG=/usr/local/bin/gdal-config \ @@ -116,14 +144,17 @@ RUN git clone --depth 1 -b ${PDAL_VERSION} https://github.com/PDAL/PDAL.git /tmp # ========================================== FROM pdal-builder AS final -# --- Maven Installation (With Dynamic Checksum) --- +# --- Maven Installation (Pinned Checksum per Labs lockdown policy) --- +# Pinning the SHA-512 in-Dockerfile prevents a compromised archive.apache.org +# from serving both a bad tarball and a matching bad checksum. To bump +# MAVEN_VERSION, update MAVEN_SHA512 in lockstep. ARG MAVEN_VERSION=3.9.9 +ARG MAVEN_SHA512=a555254d6b53d267965a3404ecb14e53c3827c09c3b94b5678835887ab404556bfaf78dcfe03ba76fa2508649dca8531c74bca4d5846513522404d48e8c4ac8b RUN mkdir -p $ROOTDIR/share/maven \ && curl -fsSL -o /tmp/apache-maven.tar.gz https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz \ - && curl -fsSL -o /tmp/apache-maven.tar.gz.sha512 https://archive.apache.org/dist/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz.sha512 \ - && echo "$(cat /tmp/apache-maven.tar.gz.sha512) /tmp/apache-maven.tar.gz" | sha512sum -c - \ + && echo "${MAVEN_SHA512} /tmp/apache-maven.tar.gz" | sha512sum -c - \ && tar -xzf /tmp/apache-maven.tar.gz -C $ROOTDIR/share/maven --strip-components=1 \ - && ln -s $ROOTDIR/share/maven/bin/mvn $ROOTDIR/bin/mvn && rm /tmp/apache-maven.tar.gz* + && ln -s $ROOTDIR/share/maven/bin/mvn $ROOTDIR/bin/mvn && rm /tmp/apache-maven.tar.gz ENV MAVEN_HOME="$ROOTDIR/share/maven" ENV SPARK_VERSION=4.0.0 diff --git a/scripts/security/README.md b/scripts/security/README.md new file mode 100644 index 0000000..e51695c --- /dev/null +++ b/scripts/security/README.md @@ -0,0 +1,59 @@ +# scripts/security/ + +Tooling that implements the Databricks Labs [Repository Lockdown policy](https://docs.google.com/document/d/1J50oKQxG9WhGXWEl5zlbCq5pf9AGh57yDhZh9nxCQC0/edit) +for GeoBrix: every third-party GitHub Action reference must be pinned to a +full commit SHA taken from a release published **before the +`2026-03-10T00:00:00Z` cutoff**. The tag name is preserved as an inline +comment for human-readable cross-reference; the comment is **not** +authoritative — reviewers must verify the SHA against the referenced +release. + +## Scripts + +| Script | Requires | Purpose | +|---|---|---| +| `list-external-actions` | `yq` (Mike Farah) | Emit the set of external actions referenced by any workflow or composite action under `.github/`, one per line. | +| `resolve-action-ref` | `gh`, `jq` | For each `action[@ref]`, resolve the most recent pre-cutoff release tag to the commit SHA it points at. Marks already-pinned entries with `✓` and drift with `⚠`. | +| `pin-gh-actions` | `git` | Consume `resolve-action-ref` output, rewrite every `uses:` line under `.github/` to the new SHA form (skipping `databricks*`-owned actions), and stage the result with `git add`. Prints the staged diff for review — **does not commit**. | + +## Typical flow + +```sh +cd "$(git rev-parse --show-toplevel)" + +# 1. Preview what would change +./scripts/security/list-external-actions \ + | xargs ./scripts/security/resolve-action-ref + +# 2. Apply (stages under .github/) +./scripts/security/list-external-actions \ + | xargs ./scripts/security/resolve-action-ref \ + | ./scripts/security/pin-gh-actions + +# 3. Review, then commit +git diff --cached -- .github +git commit -m "Re-pin GitHub Actions to commits from releases prior to 2026-03-10" +``` + +## Notes + +- **`databricks*` / `databrickslabs*` actions are skipped.** They are + considered first-party by the policy and do not require pinning; they + remain on tag references. +- **Mono-repo tag prefixes.** `resolve-action-ref` handles actions under a + mono-repo path (e.g. `databrickslabs/sandbox/acceptance` → tags like + `acceptance/v0.4.4`). Review the `⚠` output before applying — the doc + flags this as a known glitch. +- **`pin-gh-actions` does not switch branches.** Unlike the reference + implementation at `databrickslabs/blueprint`, this script assumes the + caller has already checked out the target branch. +- **Comment is informational only.** A reviewer verifying this PR must + re-run `resolve-action-ref` (or an equivalent `gh api` lookup) to + confirm every SHA corresponds to the claimed tag. + +## Refresh cadence + +The cutoff date is a constant inside `resolve-action-ref` and +`pin-gh-actions`. It will only change when the policy is updated by the +Databricks Labs team, at which point both scripts should be updated in +lockstep. diff --git a/scripts/security/list-external-actions b/scripts/security/list-external-actions new file mode 100755 index 0000000..c44bb4e --- /dev/null +++ b/scripts/security/list-external-actions @@ -0,0 +1,13 @@ +#!/bin/sh +# List external GitHub Actions referenced in workflow files. +# Output: one line per unique action, formatted as "action[@ref]". +# +# Requires: yq (https://github.com/mikefarah/yq/) + +set -eu + +root="$(git rev-parse --show-toplevel)" + +find "$root/.github" \( -name '*.yml' -o -name '*.yaml' \) -print0 | + xargs -0 yq -N '.. | .uses? | select(. and . != "./*")' | + sort -u diff --git a/scripts/security/pin-gh-actions b/scripts/security/pin-gh-actions new file mode 100755 index 0000000..7d72f20 --- /dev/null +++ b/scripts/security/pin-gh-actions @@ -0,0 +1,78 @@ +#!/bin/sh +# +# Apply GitHub Actions pinning from resolve-action-ref output. +# +# Usage: ./list-external-actions | xargs ./resolve-action-ref | ./pin-gh-actions +# +# Reads resolve-action-ref output on stdin: +# ✓ lines are already pinned — skipped. +# ⚠ lines need updating — applied to workflow files. +# +# Staging behaviour: writes changes into the working tree and stages them +# with git add .github/. The caller is responsible for reviewing and +# committing. (We intentionally do not create or switch branches here — +# GeoBrix manages branches manually, unlike the reference implementation.) +# +# Requires: git + +set -eu + +cutoff="2026-03-10T00:00:00Z" +root="$(git rev-parse --show-toplevel)" + +ensure_no_staged_changes() { + # There must be no staged changes under .github — we stage our own, and + # the user needs to be able to verify them cleanly with git diff --cached. + if ! git diff --cached --quiet -- "$root/.github" + then + printf 'error: staged changes already present under .github; commit or unstage them first\n' 1>&2 + exit 1 + fi +} + +requires_pinning() { + # Actions owned by Databricks do not need pinning. + owner="${1%%/*}" + case "${owner}" in + databricks|databrickslabs) return 1 ;; + *) return 0 ;; + esac +} + +apply_pin() { + old_ref="$1" # e.g. actions/checkout@v4 + new_ref="$2" # e.g. actions/checkout@abc123 + tag="$3" # e.g. v4.2.0 + + find "${root}/.github" \( -name '*.yml' -o -name '*.yaml' \) | + while IFS= read -r file + do + sed "s|uses: ${old_ref}|uses: ${new_ref} # ${tag}|g" "${file}" > "${file}.$$" + mv "${file}.$$" "${file}" + done +} + +stage_and_show() { + git add "$root/.github" + + printf '\n' + git diff --cached -- "$root/.github" + + printf '\nTo commit:\n' + printf ' git commit -m "Pin GitHub Actions to the commit associated with the last known release prior to %s"\n' "${cutoff}" +} + +# --- Main --- + +ensure_no_staged_changes + +awk 'NF == 5 { sub(/:$/, "", $2); print $2, $3, $5 }' | +while read -r old_ref new_ref tag +do + if requires_pinning "${old_ref}" + then + apply_pin "${old_ref}" "${new_ref}" "${tag}" + fi +done + +stage_and_show diff --git a/scripts/security/resolve-action-ref b/scripts/security/resolve-action-ref new file mode 100755 index 0000000..8200bea --- /dev/null +++ b/scripts/security/resolve-action-ref @@ -0,0 +1,167 @@ +#!/bin/sh +# +# For each action@ref argument, find the latest release published before +# a cutoff date and resolve its tag to a commit SHA. +# +# Usage: resolve-action-ref [action[@ref] ...] +# Output: ✓/⚠ action[@ref]: action@sha # tag +# +# Requires: gh, jq + +set -eu + +cutoff="2026-03-10T00:00:00Z" + +error() { + printf "error: %s\n" "$*" 1>&2 +} + +resolve_tag_sha() { + repo="$1" + tag="$2" + + ref=$(gh api "repos/$repo/git/ref/tags/$tag" --jq '.object.type + " " + .object.sha') + obj_type="${ref%% *}" + sha="${ref#* }" + + # Dereference annotated tags to the underlying commit. + if [ "$obj_type" = "tag" ] + then + sha=$(gh api "repos/$repo/git/tags/$sha" --jq '.object.sha') + fi + + printf '%s\n' "$sha" +} + +release_tags_since_cutoff() { + repo="$1" + cutoff="$2" + + gh api "repos/$repo/releases" --paginate \ + --jq '.[] | select(.draft == false and .prerelease == false) | {published_at, tag_name}' | + jq --raw-output --arg cutoff "${cutoff}" 'select(.published_at < $cutoff) | .tag_name' + + # Output is a sequence of lines containing tags that match releases prior to the cutoff. +} + +highest_tag() { + # Pipe in a sequence of lines containing version identifiers, only the highest will be returned as output. + sort --version-sort --reverse | head -1 +} + +# Fallback for repos that publish tags but no GitHub releases. +# Uses GraphQL to fetch all tags with their commit dates in bulk, +# filters to version-like names (v* or prefix/v*), prunes those +# at or after the cutoff, and returns tag names for version-sorting. +version_tags_before_cutoff() { + repo="$1" + cutoff="$2" + owner="${repo%%/*}" + name="${repo#*/}" + + # All tags, and the date of the associated commit. + gh api graphql --paginate -f query=' + query($owner: String!, $name: String!, $endCursor: String) { + repository(owner: $owner, name: $name) { + refs(refPrefix: "refs/tags/", first: 100, after: $endCursor, + orderBy: {field: TAG_COMMIT_DATE, direction: DESC}) { + pageInfo { hasNextPage endCursor } + nodes { + name + target { + ... on Commit { committedDate } + ... on Tag { target { ... on Commit { committedDate } } } + } + } + } + } + }' -f owner="${owner}" -f name="${name}" \ + --jq '.data.repository.refs.nodes[] | {name, date: (.target.committedDate // .target.target.committedDate)}' | + jq --raw-output --arg cutoff "${cutoff}" ' + select(.name | test("^(v|.*/v)[0-9]")) | + select(.date < $cutoff) | + .name' +} + +resolve_action() { + arg="$1" + + # Expected formats: + # owner/repo@ref e.g. actions/checkout@v4 + # owner/repo/path...@ref e.g. databrickslabs/sandbox/acceptance@acceptance/v0.4.4 + # owner/repo e.g. actions/checkout (ref not required) + # owner/repo/path... e.g. databrickslabs/sandbox/acceptance + case "$arg" in + */*@*) + ref="${arg##*@}" # After the trailing @ + action="${arg%%@*}" # Before the trailing @ + ;; + */*) + ref="" + action="$arg" + ;; + *) + error "unsupported action format: $arg" + return 1 + esac + + owner="${action%%/*}" # Before the first / + repo_and_path="${action#*/}" # After the first / + repo="${owner}/${repo_and_path%%/*}" # owner/repo (strip any sub-path) + + # Detect if it's a mono-repo: convention is that a subpath is used as a tag prefix. + # For example: owner/repo/path -> @path/vX.Y + case "$repo_and_path" in + */*) path_in_repo="${repo_and_path#*/}" ;; + *) path_in_repo="" ;; + esac + + # Find the latest release published before the cutoff. We try the tag + # prefix filter first (mono-repos like databrickslabs/sandbox namespace + # tags as /vX.Y); if nothing matches, fall back to unfiltered + # tags so mono-repos with a shared version (e.g. github/codeql-action, + # which publishes a single vX.Y.Z used by every subpath) still resolve. + releases="$(release_tags_since_cutoff "$repo" "$cutoff")" + tag="$(printf '%s\n' "$releases" | + awk -F/ -vtag_prefix="${path_in_repo}" '!tag_prefix || tag_prefix==$1' | + highest_tag)" + if [ -z "$tag" ] && [ -n "$path_in_repo" ] + then + tag="$(printf '%s\n' "$releases" | highest_tag)" + fi + + # Fall back to the tags API for repos that use tags but have no GitHub releases. + if [ -z "$tag" ] + then + vtags="$(version_tags_before_cutoff "$repo" "$cutoff")" + tag="$(printf '%s\n' "$vtags" | + awk -F/ -vtag_prefix="${path_in_repo}" '!tag_prefix || tag_prefix==$1' | + highest_tag)" + if [ -z "$tag" ] && [ -n "$path_in_repo" ] + then + tag="$(printf '%s\n' "$vtags" | highest_tag)" + fi + fi + + if [ -z "$tag" ] + then + return 1 + fi + + sha=$(resolve_tag_sha "$repo" "$tag") + + if [ "$ref" = "$sha" ] + then + printf "✓ %s\n" "$arg" + else + printf "⚠ %s: %s@%s # %s\n" "$arg" "$action" "$sha" "$tag" + fi +} + +for arg in "$@" +do + if ! resolve_action "$arg" + then + error "$arg: no release found before $cutoff" + fi +done diff --git a/scripts/util/install_hadoop.sh b/scripts/util/install_hadoop.sh old mode 100644 new mode 100755 index 43a32e2..8270796 --- a/scripts/util/install_hadoop.sh +++ b/scripts/util/install_hadoop.sh @@ -1,4 +1,16 @@ -wget https://downloads.apache.org/hadoop/common/hadoop-3.4.0/hadoop-3.4.0.tar.gz -tar -xzf hadoop-3.4.0.tar.gz -mv hadoop-3.4.0 /usr/local/hadoop -cp /usr/local/hadoop/lib/native/*.so /usr/lib/ \ No newline at end of file +#!/usr/bin/env bash +# Standalone Hadoop installer for hosts that mirror the Dockerfile setup. +# Not referenced by the build; kept as a manual helper. Checksum-pinned per +# Labs lockdown policy — update HADOOP_VERSION and HADOOP_SHA512 in lockstep. +set -euo pipefail + +HADOOP_VERSION="${HADOOP_VERSION:-3.4.0}" +HADOOP_SHA512="${HADOOP_SHA512:-6f653c0109f97430047bd3677c50da7c8a2809d153b231794cf980b3208a6b4beff8ff1a03a01094299d459a3a37a3fe16731629987165d71f328657dbf2f24c}" + +tarball="hadoop-${HADOOP_VERSION}.tar.gz" +wget -q "https://downloads.apache.org/hadoop/common/hadoop-${HADOOP_VERSION}/${tarball}" +echo "${HADOOP_SHA512} ${tarball}" | sha512sum -c - +tar -xzf "${tarball}" +mv "hadoop-${HADOOP_VERSION}" /usr/local/hadoop +cp /usr/local/hadoop/lib/native/*.so /usr/lib/ +rm "${tarball}"