Skip to content

Warm Maven Dependency Cache #14

Warm Maven Dependency Cache

Warm Maven Dependency Cache #14

name: Warm Maven Dependency Cache
# This workflow pre-downloads all Maven dependencies via JFrog Artifactory
# and saves them to the GitHub Actions cache. Forked PRs (which cannot
# authenticate to JFrog) restore this cache to build without credentials.
#
# Triggers:
# - push to main when pom.xml changes (keeps cache fresh after dep updates)
# - daily schedule (prevents 7-day cache eviction)
# - manual dispatch (with optional PR number to warm cache for a fork's pom.xml)
on:
push:
branches: [main]
paths: ['**/pom.xml']
schedule:
- cron: '0 6 * * *' # Daily at 06:00 UTC
workflow_dispatch:
inputs:
pr_number:
description: 'PR number to warm cache for (reads pom.xml from the PR branch). Leave empty to warm from main.'
required: false
type: string
permissions:
id-token: write
contents: read
pull-requests: read # Needed to read PR metadata for fork checkout
jobs:
warm-cache:
# Run on Linux only. Maven repository contents (JARs/POMs) are platform-independent.
# Windows forked PRs restore this same cache via the restore-keys prefix match.
# Note: Windows runners in databricks-protected-runner-group lack bash, which
# is required for the OIDC token exchange scripts.
runs-on:
group: databricks-protected-runner-group
labels: linux-ubuntu-latest
steps:
- name: Set up JDK
uses: actions/setup-java@c1e323688fd81a25caa38c78aa6df2d33d3e20d9 # v4
with:
java-version: 21
distribution: 'adopt'
# If PR number provided, checkout only pom.xml files from the fork (security: no source code)
- name: Checkout PR pom.xml files (sparse)
if: inputs.pr_number != ''
shell: bash
run: |
set -euo pipefail
# Fetch PR metadata
PR_DATA=$(curl -sLS \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ github.token }}" \
"https://api.github.com/repos/${{ github.repository }}/pulls/${{ inputs.pr_number }}")
FORK_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name')
FORK_REF=$(echo "$PR_DATA" | jq -r '.head.ref')
echo "Warming cache for PR #${{ inputs.pr_number }} from ${FORK_REPO}@${FORK_REF}"
# Sparse checkout: only pom.xml files (no source code from fork)
git init .
git remote add fork "https://github.com/${FORK_REPO}.git"
git config core.sparseCheckout true
echo "**/pom.xml" > .git/info/sparse-checkout
echo "pom.xml" >> .git/info/sparse-checkout
git fetch --depth=1 fork "${FORK_REF}"
git checkout FETCH_HEAD
- name: Checkout main branch
if: inputs.pr_number == ''
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- name: Get JFrog OIDC token
shell: bash
run: |
set -euo pipefail
# Get GitHub OIDC ID token
ID_TOKEN=$(curl -sLS \
-H "User-Agent: actions/oidc-client" \
-H "Authorization: Bearer $ACTIONS_ID_TOKEN_REQUEST_TOKEN" \
"${ACTIONS_ID_TOKEN_REQUEST_URL}&audience=jfrog-github" | jq .value | tr -d '"')
echo "::add-mask::${ID_TOKEN}"
# Exchange for JFrog access token
ACCESS_TOKEN=$(curl -sLS -XPOST -H "Content-Type: application/json" \
"https://databricks.jfrog.io/access/api/v1/oidc/token" \
-d "{\"grant_type\": \"urn:ietf:params:oauth:grant-type:token-exchange\", \"subject_token_type\":\"urn:ietf:params:oauth:token-type:id_token\", \"subject_token\": \"${ID_TOKEN}\", \"provider_name\": \"github-actions\"}" | jq .access_token | tr -d '"')
echo "::add-mask::${ACCESS_TOKEN}"
if [ -z "$ACCESS_TOKEN" ] || [ "$ACCESS_TOKEN" = "null" ]; then
echo "FAIL: Could not extract JFrog access token"
exit 1
fi
echo "JFROG_ACCESS_TOKEN=${ACCESS_TOKEN}" >> "$GITHUB_ENV"
echo "JFrog OIDC token obtained successfully"
- name: Configure Maven with JFrog credentials
shell: bash
run: |
set -euo pipefail
mkdir -p ~/.m2
cat > ~/.m2/settings.xml << EOF
<settings>
<mirrors>
<mirror>
<id>jfrog-central</id>
<mirrorOf>*</mirrorOf>
<url>https://databricks.jfrog.io/artifactory/db-maven/</url>
</mirror>
</mirrors>
<servers>
<server>
<id>jfrog-central</id>
<username>gha-service-account</username>
<password>${JFROG_ACCESS_TOKEN}</password>
</server>
</servers>
</settings>
EOF
- name: Resolve all dependencies via JFrog
shell: bash
run: |
set -euo pipefail
# Step 1: Install all modules — resolves external dependencies from JFrog and
# installs inter-module SNAPSHOTs (e.g., jdbc-core used by assembly-thin/uber).
echo "=== Step 1: Installing all modules ==="
mvn -B install -DskipTests -Dmaven.javadoc.skip=true -Dmaven.source.skip=true -Ddependency-check.skip=true
# Step 2: Run the exact same commands that PR workflows use.
# This ensures ALL plugins, providers, and metadata are resolved and cached,
# including test-time artifacts (surefire-junit-platform, jacoco agent) and
# build-time plugins (spotless, toolchains, owasp) that mvn install alone
# doesn't trigger.
# Step 2: Run a real unit test to trigger full surefire provider resolution.
# surefire-junit-platform is resolved lazily at test execution time, not at
# plugin initialization. We need at least one test to actually run.
echo "=== Step 2: Running a single unit test to resolve surefire provider ==="
mvn -B -pl jdbc-core test -Dtest="DatabricksParameterMetaDataTest#testInitialization" -Ddependency-check.skip=true || true
echo "=== Step 3: Running spotless check ==="
mvn -B --errors spotless:check || true
# Step 4: Run jacoco with a real test to resolve jacoco agent + report plugins
echo "=== Step 4: Running jacoco coverage ==="
mvn -B -pl jdbc-core test -Dtest="DatabricksParameterMetaDataTest#testInitialization" jacoco:report -Ddependency-check.skip=true || true
echo "=== Step 5: Running integration test compilation ==="
mvn -B -pl jdbc-core compile test-compile -Ddependency-check.skip=true || true
echo "Dependency resolution complete"
- name: Remove _remote.repositories before saving cache
shell: bash
run: |
# Remove _remote.repositories marker files before saving. These track
# which remote repo ID each artifact was downloaded from (jfrog-central).
# Without them, forked PRs can use the cache in offline mode without
# Maven complaining about repo ID mismatches.
COUNT=$(find ~/.m2/repository -name '_remote.repositories' -delete -print | wc -l)
echo "Removed ${COUNT} _remote.repositories markers"
- name: Save Maven dependency cache
uses: actions/cache/save@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
with:
path: ~/.m2/repository
key: maven-deps-${{ hashFiles('**/pom.xml') }}