Warm Maven Dependency Cache #48
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Warm Maven Dependency Cache | |
| # This workflow pre-downloads all Maven dependencies via JFrog Artifactory | |
| # and saves them to the GitHub Actions cache. Forked PRs (which cannot | |
| # authenticate to JFrog) restore this cache to build without credentials. | |
| # | |
| # Triggers: | |
| # - push to main when pom.xml changes (keeps cache fresh after dep updates) | |
| # - daily schedule (prevents 7-day cache eviction) | |
| # - manual dispatch (with optional PR number to warm cache for a fork's pom.xml) | |
| on: | |
| push: | |
| branches: [main] | |
| paths: ['**/pom.xml'] | |
| schedule: | |
| - cron: '0 6 * * *' # Daily at 06:00 UTC | |
| workflow_dispatch: | |
| inputs: | |
| pr_number: | |
| description: 'PR number to warm cache for (reads pom.xml from the PR branch). Leave empty to warm from main.' | |
| required: false | |
| type: string | |
| permissions: | |
| id-token: write | |
| contents: read | |
| pull-requests: read # Needed to read PR metadata for fork checkout | |
| jobs: | |
| warm-cache: | |
| # Run on both Linux and Windows. GitHub Actions cache is OS-scoped — | |
| # a cache saved on Linux cannot be restored on Windows and vice versa. | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| github-runner: [linux-ubuntu-latest, windows-server-latest] | |
| runs-on: | |
| group: databricks-protected-runner-group | |
| labels: ${{ matrix.github-runner }} | |
| steps: | |
| - name: Set up JDK | |
| uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4 | |
| with: | |
| java-version: 21 | |
| distribution: 'adopt' | |
| - name: Enable long paths (Windows) | |
| if: runner.os == 'Windows' | |
| run: git config --system core.longpaths true | |
| # If PR number provided, checkout only pom.xml files from the fork (security: no source code) | |
| - name: Checkout PR pom.xml files (sparse) | |
| if: inputs.pr_number != '' | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # Fetch PR metadata | |
| PR_DATA=$(curl -sLS \ | |
| -H "Accept: application/vnd.github+json" \ | |
| -H "Authorization: Bearer ${{ github.token }}" \ | |
| "https://api.github.com/repos/${{ github.repository }}/pulls/${{ inputs.pr_number }}") | |
| FORK_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name') | |
| FORK_REF=$(echo "$PR_DATA" | jq -r '.head.ref') | |
| echo "Warming cache for PR #${{ inputs.pr_number }} from ${FORK_REPO}@${FORK_REF}" | |
| # Sparse checkout: only pom.xml files (no source code from fork) | |
| git init . | |
| git remote add fork "https://github.com/${FORK_REPO}.git" | |
| git config core.sparseCheckout true | |
| echo "**/pom.xml" > .git/info/sparse-checkout | |
| echo "pom.xml" >> .git/info/sparse-checkout | |
| git fetch --depth=1 fork "${FORK_REF}" | |
| git checkout FETCH_HEAD | |
| - name: Checkout main branch | |
| if: inputs.pr_number == '' | |
| uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 | |
| - name: Get JFrog OIDC token | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # Get GitHub OIDC ID token | |
| ID_TOKEN=$(curl -sLS \ | |
| -H "User-Agent: actions/oidc-client" \ | |
| -H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \ | |
| "${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=jfrog-github" | jq .value | tr -d '"') | |
| echo "::add-mask::${ID_TOKEN}" | |
| # Exchange for JFrog access token | |
| ACCESS_TOKEN=$(curl -sLS -XPOST -H "Content-Type: application/json" \ | |
| "https://databricks.jfrog.io/access/api/v1/oidc/token" \ | |
| -d "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"${ID_TOKEN}\", \"provider_name\": \"github-actions\"}" | jq .access_token | tr -d '"') | |
| echo "::add-mask::${ACCESS_TOKEN}" | |
| if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then | |
| echo "FAIL: Could not extract JFrog access token" | |
| exit 1 | |
| fi | |
| echo "JFROG_ACCESS_TOKEN=${ACCESS_TOKEN}" >> "$GITHUB_ENV" | |
| echo "JFrog OIDC token obtained successfully" | |
| - name: Configure Maven with JFrog credentials | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| mkdir -p ~/.m2 | |
| cat > ~/.m2/settings.xml << EOF | |
| <settings> | |
| <mirrors> | |
| <mirror> | |
| <id>jfrog-central</id> | |
| <mirrorOf>*</mirrorOf> | |
| <url>https://databricks.jfrog.io/artifactory/db-maven/</url> | |
| </mirror> | |
| </mirrors> | |
| <servers> | |
| <server> | |
| <id>jfrog-central</id> | |
| <username>gha-service-account</username> | |
| <password>${JFROG_ACCESS_TOKEN}</password> | |
| </server> | |
| </servers> | |
| </settings> | |
| EOF | |
| - name: Resolve all dependencies via JFrog | |
| shell: bash | |
| run: | | |
| set -euo pipefail | |
| # Run the EXACT same Maven commands as the PR CI workflows. | |
| # This is the only reliable way to ensure every plugin, provider, | |
| # and transitive dependency is resolved and cached. Each command | |
| # mirrors a real CI step from prCheck.yml, prIntegrationTests.yml, | |
| # or coverageReport.yml. | |
| echo "=== 1/8: spotless:check (formatting-check job) ===" | |
| mvn -B --errors spotless:check || true | |
| echo "=== 2/8: install all modules (packaging-tests job) ===" | |
| mvn -B -pl jdbc-core,assembly-uber,assembly-thin clean install -DskipTests -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Ddependency-check.skip=true | |
| echo "=== 3/8: Arrow Patch Tests (unit-tests job, JDK 17+) ===" | |
| mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Ddependency-check.skip=true || true | |
| echo "=== 4/8: Arrow Allocator Tests (unit-tests job, JDK 17+) ===" | |
| mvn -B -Pjdk21-NioNotOpen -pl jdbc-core test -Dgroups='Jvm17PlusAndArrowToNioReflectionDisabled' -Dtest="ArrowBufferAllocatorNettyManagerTest,ArrowBufferAllocatorUnsafeManagerTest,ArrowBufferAllocatorUnknownManagerTest" -DforkCount=1 -DreuseForks=false -Ddependency-check.skip=true || true | |
| echo "=== 5/8: Arrow Memory Tests (unit-tests job) ===" | |
| mvn -B -Plow-memory -pl jdbc-core test -Dtest='DatabricksArrowPatchMemoryUsageTest' -Ddependency-check.skip=true || true | |
| echo "=== 6/8: Unit Tests with jacoco (unit-tests job) ===" | |
| mvn -B -pl jdbc-core clean test -Dtest="DatabricksParameterMetaDataTest#testInitialization" -Dgroups='!Jvm17PlusAndArrowToNioReflectionDisabled' jacoco:report -Ddependency-check.skip=true || true | |
| echo "=== 7/8: Integration test compile (prIntegrationTests job) ===" | |
| mvn -B -pl jdbc-core compile test-compile -Ddependency-check.skip=true || true | |
| echo "=== 8/8: Resolve all declared plugins ===" | |
| mvn -B -pl jdbc-core dependency:resolve-plugins -Ddependency-check.skip=true || true | |
| echo "Dependency resolution complete" | |
| - name: Normalize _remote.repositories before saving cache | |
| shell: bash | |
| run: | | |
| # Replace 'jfrog-central' with 'central' in _remote.repositories files. | |
| # These files track which repo ID each artifact was downloaded from. The | |
| # cache warmer downloads from 'jfrog-central' (the JFrog mirror), but | |
| # Maven's offline mode expects artifacts to be associated with 'central' | |
| # (the default Maven Central repo ID). Without this, offline mode refuses | |
| # cached artifacts with "has not been downloaded from it before". | |
| COUNT=$(find ~/.m2/repository -name '_remote.repositories' -print | wc -l) | |
| find ~/.m2/repository -name '_remote.repositories' -exec sed -i 's/jfrog-central/central/g' {} \; | |
| echo "Normalized ${COUNT} _remote.repositories markers (jfrog-central -> central)" | |
| - name: Generate cache key with timestamp | |
| id: cache-key | |
| shell: bash | |
| run: | | |
| # Include timestamp so each warmer run creates a new cache entry | |
| # (GitHub Actions caches are immutable — can't overwrite existing keys). | |
| # The restore step uses prefix 'maven-deps-' to match the latest entry. | |
| # Old entries auto-expire after 7 days of no access. | |
| TIMESTAMP=$(date -u +%Y%m%d%H%M%S) | |
| POM_HASH=${{ hashFiles('**/pom.xml') }} | |
| echo "key=maven-deps-${TIMESTAMP}-${POM_HASH}" >> $GITHUB_OUTPUT | |
| - name: Save Maven dependency cache | |
| uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 | |
| with: | |
| path: ~/.m2/repository | |
| key: ${{ steps.cache-key.outputs.key }} |