Skip to content

Commit e7a642f

Browse files
committed
Merge branch 'add-secret-scanning' into 'main'
security: add gitleaks secret scanning to prevent secret leaks Closes #159 See merge request postgres-ai/postgresai!224
2 parents fad8c85 + ab18524 commit e7a642f

File tree

5 files changed

+232
-0
lines changed

5 files changed

+232
-0
lines changed

.githooks/pre-commit

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
# Pre-commit hook: scan staged files for secrets using gitleaks
3+
# Activate with: git config core.hooksPath .githooks
4+
5+
set -euo pipefail
6+
7+
if ! command -v gitleaks &>/dev/null; then
8+
echo "❌ gitleaks not found — install it to enable pre-commit secret scanning:" >&2
9+
echo " https://github.com/gitleaks/gitleaks#installing" >&2
10+
echo " (macOS: brew install gitleaks)" >&2
11+
exit 1
12+
fi
13+
14+
echo "🔍 Running gitleaks secret scan on staged files..."
15+
16+
if ! gitleaks protect --staged \
17+
--config "$(git rev-parse --show-toplevel)/gitleaks.toml" \
18+
--verbose --redact 2>&1; then
19+
echo "" >&2
20+
echo "❌ Secret detected in staged files. Commit blocked." >&2
21+
echo " Review the output above and remove/rotate the secret." >&2
22+
echo " To add a false positive to the allowlist, edit gitleaks.toml." >&2
23+
exit 1
24+
fi
25+
26+
echo "✅ No secrets detected."

.gitlab-ci.yml

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,91 @@ default:
1818
interruptible: true
1919

2020
stages:
21+
- security
2122
- build
2223
- test
2324
- publish
2425
- preview
2526

2627
# Build images from current code for e2e tests
2728
# Images are pushed to GitLab Container Registry and pulled by test jobs
29+
30+
# ─── Secret Scanning ───────────────────────────────────────────────────────────
31+
gitleaks:
32+
stage: security
33+
image:
34+
name: ghcr.io/gitleaks/gitleaks:v8.30.0@sha256:105ac66a57b2bb8afb61a3b8a5dcc4817773d03724a7e8a515214cfe58225556
35+
entrypoint: [""] # Override default entrypoint so GitLab CI can run shell commands
36+
variables:
37+
GIT_DEPTH: "0" # Full clone required — shallow clone silently misses commit history
38+
script:
39+
- |
40+
# Scope scan to new commits only — avoids re-scanning repo history on every run
41+
if [[ "${CI_MERGE_REQUEST_DIFF_BASE_SHA:-}" =~ ^[0-9a-f]{40}$ ]]; then
42+
# MR pipeline: scan only commits added in this MR
43+
gitleaks detect --source . --config gitleaks.toml --verbose --redact \
44+
--report-path gitleaks-report.json --report-format json \
45+
--log-opts="${CI_MERGE_REQUEST_DIFF_BASE_SHA}..${CI_COMMIT_SHA}"
46+
elif [ -n "${CI_COMMIT_BEFORE_SHA:-}" ] && \
47+
[ "${CI_COMMIT_BEFORE_SHA}" != "0000000000000000000000000000000000000000" ]; then
48+
# Push to existing branch: scan only new commits
49+
gitleaks detect --source . --config gitleaks.toml --verbose --redact \
50+
--report-path gitleaks-report.json --report-format json \
51+
--log-opts="${CI_COMMIT_BEFORE_SHA}..${CI_COMMIT_SHA}"
52+
else
53+
# New branch or force push: scan commits unique to this branch only
54+
MERGE_BASE=$(git merge-base "origin/${CI_DEFAULT_BRANCH:-main}" "$CI_COMMIT_SHA" 2>/dev/null || echo "")
55+
if [ -n "$MERGE_BASE" ] && [ "$MERGE_BASE" != "$CI_COMMIT_SHA" ]; then
56+
LOG_OPTS="${MERGE_BASE}..${CI_COMMIT_SHA}"
57+
else
58+
# Fallback: scan only HEAD commit (safe — bounded to a single commit)
59+
LOG_OPTS="${CI_COMMIT_SHA}^!"
60+
fi
61+
gitleaks detect --source . --config gitleaks.toml --verbose --redact \
62+
--report-path gitleaks-report.json --report-format json \
63+
--log-opts="$LOG_OPTS"
64+
fi
65+
artifacts:
66+
paths:
67+
- gitleaks-report.json
68+
when: on_failure
69+
expire_in: 7 days
70+
rules:
71+
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
72+
- if: $CI_PIPELINE_SOURCE == "push"
73+
allow_failure: false
74+
# ───────────────────────────────────────────────────────────────────────────────
75+
76+
# ─── Gitleaks Rule + Allowlist Tests ──────────────────────────────────────────
77+
gitleaks-rule-test:
78+
stage: security
79+
image:
80+
name: ghcr.io/gitleaks/gitleaks:v8.30.0@sha256:105ac66a57b2bb8afb61a3b8a5dcc4817773d03724a7e8a515214cfe58225556
81+
entrypoint: [""]
82+
before_script:
83+
- apk add --no-cache python3
84+
script:
85+
- |
86+
echo "=== Test 1: Detection rules must fire on synthetic fixtures ==="
87+
python3 -c "import re; src=open('gitleaks.toml').read(); src=re.sub(r'paths = \\[.*?\\n\\]','paths = []',src,flags=re.DOTALL); open('/tmp/gitleaks-rules-only.toml','w').write(src)"
88+
FIXTURE_EXIT=0
89+
gitleaks detect --source tests/fixtures/gitleaks/ --config /tmp/gitleaks-rules-only.toml \
90+
--no-git --verbose --redact 2>&1 || FIXTURE_EXIT=$?
91+
if [ "$FIXTURE_EXIT" -eq 0 ]; then
92+
echo "FAIL: Rules did not fire on fixture file"
93+
exit 1
94+
fi
95+
echo "PASS: Rules fired on fixtures (exit=$FIXTURE_EXIT)"
96+
echo ""
97+
echo "=== Test 2: Allowlist must suppress fixtures in full-repo scan ==="
98+
gitleaks detect --source . --config gitleaks.toml --no-git --redact
99+
echo "PASS: Full repo scan clean (allowlist working)"
100+
rules:
101+
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
102+
- if: '$CI_PIPELINE_SOURCE == "push"'
103+
allow_failure: false
104+
# ───────────────────────────────────────────────────────────────────────────────
105+
28106
build:test:images:
29107
stage: build
30108
image: docker:27.3

gitleaks.toml

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
title = "postgres-ai gitleaks config"
2+
3+
[extend]
4+
# extend the default ruleset
5+
useDefault = true
6+
7+
# GitLab token types — https://docs.gitlab.com/security/tokens/
8+
[[rules]]
9+
id = "gitlab-pat"
10+
description = "GitLab Personal Access Token"
11+
regex = '''\bglpat-[A-Za-z0-9_-]{20,}\b'''
12+
tags = ["gitlab", "token"]
13+
14+
[[rules]]
15+
id = "gitlab-runner-token"
16+
description = "GitLab Runner Registration Token"
17+
regex = '''\bglrt-[A-Za-z0-9_-]{20,}\b'''
18+
tags = ["gitlab", "token"]
19+
20+
[[rules]]
21+
id = "gitlab-deploy-token"
22+
description = "GitLab Deploy Token"
23+
regex = '''\bgldt-[A-Za-z0-9_-]{20,}\b'''
24+
tags = ["gitlab", "token"]
25+
26+
[[rules]]
27+
id = "gitlab-trigger-token"
28+
description = "GitLab Pipeline Trigger Token"
29+
regex = '''\bglptt-[A-Za-z0-9_-]{20,}\b'''
30+
tags = ["gitlab", "token"]
31+
32+
[[rules]]
33+
id = "gitlab-oauth-app-secret"
34+
description = "GitLab OAuth Application Secret"
35+
regex = '''\bgloas-[A-Za-z0-9_-]{20,}\b'''
36+
tags = ["gitlab", "token"]
37+
38+
[[rules]]
39+
id = "gitlab-scim-token"
40+
description = "GitLab SCIM Token"
41+
regex = '''\bglsoat-[A-Za-z0-9_-]{20,}\b'''
42+
tags = ["gitlab", "token"]
43+
44+
[[rules]]
45+
id = "anthropic-api-key"
46+
description = "Anthropic API Key"
47+
regex = '''\bsk-ant-api03-[A-Za-z0-9_-]{20,}\b'''
48+
tags = ["anthropic", "ai"]
49+
50+
[[rules]]
51+
id = "anthropic-oauth-token"
52+
description = "Anthropic OAuth Token"
53+
regex = '''\bsk-ant-oat[0-9]{2}-[A-Za-z0-9_-]{20,}\b'''
54+
tags = ["anthropic", "ai"]
55+
56+
[[rules]]
57+
id = "hetzner-api-token"
58+
description = "Hetzner Cloud API Token (variable assignment)"
59+
regex = '''(?i)\b(hetzner_token|hetzner_api_token|hetzner_api_key|hcloud_token|hcloud_api_key|hz_token)\b\s*[:=]\s*["']?[A-Za-z0-9_-]{60,}["']?'''
60+
tags = ["hetzner", "cloud"]
61+
62+
# Global allowlist — gitleaks uses [[allowlists]] (array of tables)
63+
[[allowlists]]
64+
description = "False positives: test fixtures, example data, CI collections, rotated secrets"
65+
66+
regexes = [
67+
'''example\.com''',
68+
'''dummy[-_]?token''',
69+
'''fake[-_]?key''',
70+
'''test[-_]?secret''',
71+
]
72+
73+
paths = [
74+
# Unit test fixtures with dummy credentials
75+
'''cli/test/''',
76+
'''^spec/''',
77+
'''__tests__/''',
78+
# Postman collections (Stripe pk_test_ keys, recorded calls)
79+
'''\.ci/.*\.postman_collection\.json''',
80+
# Sqitch DB migration verification scripts (test JWTs, example secrets)
81+
'''db/verify/''',
82+
# Terraform test files (test passwords/credentials in .tftest.hcl)
83+
'''terraform/.*tests/''',
84+
'''terraform/.*\.tftest\.hcl$''',
85+
# GCP/AWS user_data scripts (cloud-init bootstrap, example credentials)
86+
'''terraform/.*/user_data\.sh$''',
87+
# Gitleaks test suites (intentionally contain synthetic secrets for rule testing)
88+
'''tests/gitleaks/fixtures/''',
89+
# Our own fixture directory
90+
'''tests/fixtures/gitleaks/''',
91+
]
92+
93+
# Commit SHA allowlist: known rotated/revoked secrets pending history cleanup
94+
# glpat-tars3 token committed by bench-bot on 2026-02-25 — token already revoked (infra#20)
95+
commits = ["5fbe547dd4b4efb052d8642d0613ac3e32f1fb52"]

tests/fixtures/gitleaks/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Gitleaks Test Fixtures
2+
3+
Synthetic secrets used to verify that gitleaks rules fire correctly.
4+
These are intentionally fake — not real credentials.
5+
6+
This directory is allowlisted in `gitleaks.toml` so the CI scan ignores it.
7+
8+
## Verify the rules work
9+
10+
```bash
11+
# Should exit 1 (secrets detected)
12+
gitleaks detect --no-git --source tests/fixtures/gitleaks/ --config gitleaks.toml --verbose
13+
14+
# Should exit 0 (allowlisted path)
15+
gitleaks detect --source . --config gitleaks.toml --verbose
16+
```
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# SYNTHETIC TEST FIXTURES — NOT REAL SECRETS
2+
# Used to verify gitleaks detection rules.
3+
# This directory is allowlisted in gitleaks.toml for normal scans.
4+
5+
GITLAB_PAT=glpat-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
6+
GITLAB_RUNNER=glrt-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
7+
GITLAB_DEPLOY=gldt-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
8+
GITLAB_TRIGGER=glptt-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
9+
GITLAB_OAUTH=gloas-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
10+
ANTHROPIC_API_KEY=sk-ant-api03-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
11+
ANTHROPIC_OAUTH=sk-ant-oat01-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
12+
13+
# GitLab SCIM token (synthetic — for rule testing only)
14+
GITLAB_SCIM=glsoat-xxxxxxxxxxxxxxxxxxxxxxxx
15+
16+
# Hetzner API token (synthetic — for rule testing only)
17+
hetzner_token=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAaaaa

0 commit comments

Comments
 (0)