Skip to content

Commit 9b4f18a

Browse files
authored
Merge branch 'main' into feat/server-settings-config
2 parents d2f38dc + c841444 commit 9b4f18a

23 files changed

Lines changed: 2243 additions & 0 deletions

package.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,14 @@
7171
"test-coverage-ci": "cross-env NODE_ENV=test vitest --run --dir ./test --coverage.enabled=true --coverage.reporter=lcovonly --coverage.reporter=text",
7272
"test:integration": "cross-env NODE_ENV=test vitest --run --config vitest.config.integration.ts",
7373
"test:watch": "cross-env NODE_ENV=test vitest --dir ./test --watch",
74+
"test:migrate": "cross-env NODE_ENV=test vitest --run --dir ./scripts/migrate/test",
7475
"prepare": "node ./scripts/prepare.js",
7576
"lint": "eslint",
7677
"lint:fix": "eslint --fix",
78+
"migrate:urls": "node scripts/migrate/migrate-urls.js",
79+
"migrate:users": "node scripts/migrate/migrate-users.js",
80+
"backup:urls": "node scripts/migrate/backup-urls.js",
81+
"backup:users": "node scripts/migrate/backup-users.js",
7782
"format": "prettier --write \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml,css,scss}\" --ignore-path .gitignore --config ./.prettierrc",
7883
"format:check": "prettier --check \"**/*.{js,jsx,ts,tsx,json,md,yml,yaml,css,scss}\" --ignore-path .gitignore --config ./.prettierrc",
7984
"gen-schema-doc": "node ./scripts/doc-schema.js",
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# Git-Proxy v1.19.2 → v2.0.0 migration (MongoDB or file DB)
2+
3+
Operator prep for upgrade, aligned with [finos/git-proxy#1535](https://github.com/finos/git-proxy/issues/1535#issuecomment-4478956510) (these scripts do **not** replace your own DB backup/snapshot).
4+
**Behavior:** dry-run by default for both phases; normalization is idempotent; email apply skips unchanged rows and checks uniqueness before writes; backups are explicit helper scripts plus your own infra.
5+
6+
| Phase | Scripts | Goal |
7+
| ----- | ------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
8+
| **1** | `migrate-urls.js`, `backup-urls.js` | **Repo URL normalization** — append `.git` to `repos.url` where missing (idempotent) |
9+
| **2** | `migrate-users.js`, `backup-users.js` | **Email audit** (blocking issues) + optional **CSV apply**; **ACL audit** — list `canPush` / `canAuthorise` entries that do not resolve to any `User.username` (no silent rewrite) |
10+
11+
Configuration: `scripts/migrate/lib/config.js` and `scripts/migrate/lib/datastore.js`. Report file names and contents: [Report artifacts](#report-artifacts) below.
12+
13+
## npm scripts
14+
15+
```bash
16+
npm run backup:urls
17+
npm run migrate:urls # repo URL normalization — dry-run
18+
npm run migrate:urls -- --apply # apply normalization
19+
20+
npm run backup:users
21+
npm run migrate:users # email + ACL audit (dry-run)
22+
npm run migrate:users -- --apply --csv ./map.csv
23+
```
24+
25+
Equivalent: `node scripts/migrate/<script>.js` from the repository root (same env vars).
26+
27+
Optional local env file (example): `scripts/migrate/envs/local.env`
28+
29+
```bash
30+
set -a && source scripts/migrate/envs/local.env && set +a
31+
npm run migrate:users
32+
```
33+
34+
---
35+
36+
## Database backend (`--dbType`)
37+
38+
All scripts connect through `scripts/migrate/lib/datastore.js`.
39+
40+
| Backend | `--dbType` | When used |
41+
| ----------- | ---------- | ------------------------------------------------- |
42+
| MongoDB | `mongo` | **Default** if `--dbType` and `DB_TYPE` are unset |
43+
| File (neDB) | `fs` | Explicit: `--dbType fs` or `DB_TYPE=fs` |
44+
45+
Priority: CLI `--dbType` → env `DB_TYPE``mongo` (no auto-detection).
46+
47+
Collections / files: `users`, `repos`.
48+
49+
---
50+
51+
## Environment variables
52+
53+
| Variable | Required | Default | Purpose |
54+
| --------------- | -------- | -------------------------------- | -------------------------------------------------- |
55+
| `MONGO_URI` | no | `mongodb://localhost:27017` | MongoDB connection string (`dbType=mongo`) |
56+
| `DB_NAME` | no | `git-proxy` | MongoDB database name |
57+
| `DB_TYPE` | no | `mongo` | Backend: `mongo` or `fs` |
58+
| `USERS_DB_PATH` | no | `./.data/db/users.db` | neDB users file (`dbType=fs`), relative to **cwd** |
59+
| `REPOS_DB_PATH` | no | `./.data/db/repos.db` | neDB repos file (`dbType=fs`), relative to **cwd** |
60+
| `REPORTS_DIR` | no | `reports/<YYYY-MM-DD>-migration` | Directory where report files are written |
61+
62+
CLI overrides (same priority as above for connection): `--mongoUri`, `--dbName`, `--usersDbPath`, `--reposDbPath`.
63+
64+
When `REPORTS_DIR` is set, it is used **as the full output directory** (the dated `reports/<date>-migration` subpath is **not** appended). When unset, reports go under `reports/<today>-migration/` relative to the process working directory.
65+
66+
```bash
67+
# MongoDB (default)
68+
export MONGO_URI="mongodb://host:27017"
69+
export DB_NAME="git-proxy"
70+
export REPORTS_DIR="/var/git-proxy/migration-run-1" # optional
71+
72+
# File DB
73+
export DB_TYPE=fs
74+
export USERS_DB_PATH="./.data/db/users.db"
75+
export REPOS_DB_PATH="./.data/db/repos.db"
76+
```
77+
78+
---
79+
80+
## Report artifacts
81+
82+
Reports are **action-oriented**: YAML/CSV list repos or users that need migration, manual URL fixes, email fixes, or ACL fixes. Repos/users already OK appear only as **counts** in YAML (for example `reposAlreadyFixed`), not as full row lists. This is not a full-database export (use `mongodump` or `backup-users` for users).
83+
84+
### Always written (when the script reaches report generation)
85+
86+
| File | Written by |
87+
| ------------------------- | ---------------------------------------------- |
88+
| `report-{timestamp}.yaml` | `migrate-urls`, `backup-urls`, `migrate-users` |
89+
90+
### Conditional CSV / JSON (created only if the relevant list is non-empty)
91+
92+
| File | Written by | Condition |
93+
| ------------------------------- | ----------------------------- | ---------------------------------- |
94+
| `report-{timestamp}.csv` | `migrate-urls`, `backup-urls` | `changes.length > 0` (URL pending) |
95+
| `url-issues-{timestamp}.csv` | `migrate-urls`, `backup-urls` | `issues.length > 0` (manual URL) |
96+
| `users-audit-{timestamp}.csv` | `migrate-users` | blocking email `users.issues` |
97+
| `acl-orphans-{timestamp}.csv` | `migrate-users` | `acl.orphans.length > 0` |
98+
| `email-changes-{timestamp}.csv` | `migrate-users` (--apply) | `apply.changes.length > 0` |
99+
| `backup-urls-{timestamp}.json` | `backup-urls` only | see backup-urls below |
100+
101+
### Backup-only extras
102+
103+
| File | Written by | Contents |
104+
| ------------------------------- | -------------- | -------------------------------------------------------- |
105+
| `backup-users-{timestamp}.json` | `backup-users` | **All** users (password field excluded) |
106+
| `users-email-{timestamp}.csv` | `backup-users` | **All** users as `username,email` template for CSV apply |
107+
108+
`backup-urls` does **not** dump every repo: the JSON array contains only documents that appear in `changes` (missing `.git`) or `issues` (blank / unsupported URL), each with `backupReason` metadata. If nothing needs migration and there are no URL issues, **no** `backup-urls-*.json` is created and the script exits 0.
109+
110+
CSV validation errors from `--apply --csv` are recorded in YAML (`report.csv.errors`); there is no separate `csv-errors-*.csv`.
111+
112+
### YAML contents (summary)
113+
114+
**Phase 1** (`migrate-urls`, `backup-urls`): `totalRepos`, `reposNeedingUpdate`, `reposAlreadyFixed`, `changes[]` (repos to append `.git`), `issues[]` (manual fix), `issueCount`. After `--apply`: may include `reposUpdated`, `errors`, and `changes[].status` (`updated` / `error` / …). `backup-urls` sets `mode: backup-only`.
115+
116+
**Phase 2** (`migrate-users`): nested structure:
117+
118+
- `mode`: `dry-run` or `apply`
119+
- `users`: audit (`totalUsers`, `counts`, `issues`, `duplicateGroups`, `blockingIssueCount`, …)
120+
- `acl`: `orphanCount`, `orphans[]` (entries in `repos.users.canPush` / `canAuthorise` with no matching `users.username`)
121+
- `apply`: present on `--apply` (`ok`, `reason`, `changes`, `conflicts`, …)
122+
- `csv`: present on `--apply` (`path`, `rowCount`, `errors`)
123+
124+
### Exit codes
125+
126+
| Script | Exit 0 when | Exit 1 when |
127+
| --------------- | ------------------------------------------------------------------------------------------------ | ------------------------------ |
128+
| `migrate-urls` | no apply errors and no URL issues | URL issues and/or apply errors |
129+
| `migrate-users` | no blocking email issues, no ACL orphans, apply OK, no post-apply email conflicts, no CSV errors | any of the above fail |
130+
| `backup-urls` | always (including “nothing to backup”) | fatal error only |
131+
| `backup-users` | success | fatal error only |
132+
133+
After a successful URL apply, a follow-up dry-run should show `reposNeedingUpdate: 0`, but exit code is still **1** if URL **issues** remain (blank or non-http(s) URLs).
134+
135+
### Read-only issue reports (manual fix in the database)
136+
137+
Some CSV files are **audit output only** — no migration script reads them back as input:
138+
139+
| File | Meaning |
140+
| ------------------- | ---------------------------------------------------------------------------------------------------------------- |
141+
| `url-issues-*.csv` | Repos whose `url` is blank or not `http`/`https` — fix in the DB, then re-run `migrate:urls` |
142+
| `acl-orphans-*.csv` | Repo ACL entries whose username does not match any `users.username` — fix in the DB, then re-run `migrate:users` |
143+
144+
Only `migrate-users --apply --csv` consumes a CSV (`username,email` for `users.email`). `email-changes-*.csv` is an apply **log**, not a re-import format.
145+
146+
---
147+
148+
## Phase 1 — Repo URL normalization (append `.git` where missing)
149+
150+
**Goal:** every `repos.url` that v2 will match must include the `.git` suffix where it is missing.
151+
152+
**Why:** v2 resolves repos by **exact** `url` via `getRepoByUrl`; v1 often relied on `name`. Incoming git HTTP traffic is normalized to a URL that includes `.git` (see `parseAction`), while legacy `repos` rows may have been stored without it. Those rows no longer match, so processors such as `checkRepoInAuthorisedList` treat the repo as unauthorized. (The admin UI already requires `.git` when creating new repositories.)
153+
154+
| | v1.19.2 | v2.0.0 |
155+
| ------------ | ------------ | ------------------------------------------ |
156+
| Lookup | `name` | `url` (exact `$eq`) |
157+
| `.git` in DB | not required | required for parity with incoming requests |
158+
159+
**Scripts:** `migrate-urls.js`, `backup-urls.js`; helpers under `lib/` (`analyze-urls.js`, `reporting.js`, `common.js`, `config.js`).
160+
161+
```bash
162+
npm run migrate:urls
163+
npm run backup:urls
164+
npm run migrate:urls -- --apply
165+
```
166+
167+
Notes: trailing `/` is normalized (`.../repo/``.../repo.git`). Blank/non-http(s) URLs are reported as issues and require **manual** correction in the database; `url-issues-*.csv` is a checklist only (see [Read-only issue reports](#read-only-issue-reports-manual-fix-in-the-database)).
168+
169+
Phase 1 report files: see [Report artifacts](#report-artifacts) (`report-*.yaml`, `report-*.csv`, `url-issues-*.csv`, `backup-urls-*.json`).
170+
171+
---
172+
173+
## Phase 2 — User emails & ACL audit
174+
175+
**Goal:** unblock v2 pushes: valid **unique** `users.email` (audit + CSV apply fallback); surface **ACL orphan** entries that must be corrected **manually** in the database (scripts never rewrite repo ACL).
176+
177+
**migrate-urls vs migrate-users**
178+
179+
| | `migrate-urls.js` | `migrate-users.js` |
180+
| ----------- | ---------------------- | ------------------------------- |
181+
| Apply flags | `--apply` | `--apply` **and** `--csv` |
182+
| Writes | `repos.url` only | `users.email` from CSV only |
183+
| Always | normalization analysis | email audit + ACL orphan report |
184+
185+
`backup-users.js` is separate (not invoked by `migrate-users`) and writes a **full** users JSON snapshot plus `users-email-*.csv` for all users (see [Report artifacts](#report-artifacts)).
186+
187+
### Recommended order (emails → ACL → verify)
188+
189+
1. **Emails** — run `npm run migrate:users` (dry-run). Resolve every blocking row in `users-audit-*.csv` / YAML `users.issues` (missing/invalid/duplicate email). Where CSV apply is appropriate, run `npm run migrate:users -- --apply --csv ./mappings.csv` and confirm `apply.ok` in the report.
190+
2. **ACL orphans** — while `acl.orphanCount` (console: `ACL orphans`) is greater than zero, fix each orphan listed in `acl-orphans-*.csv` or YAML `acl.orphans` in the database. Migration tools **do not** update `repos.users.canPush` or `repos.users.canAuthorise`.
191+
3. **Verify** — run `npm run migrate:users` (dry-run) again after each batch of fixes. Phase 2 is complete only when `blockingIssueCount` is **0**, `orphanCount` is **0**, and the process exits **0** (see [Exit codes](#exit-codes)).
192+
193+
```bash
194+
npm run migrate:users
195+
npm run backup:users
196+
npm run migrate:users -- --apply --csv ./mappings.csv
197+
# … manual ACL fixes in the database …
198+
npm run migrate:users # repeat until ACL orphans: 0 and exit 0
199+
```
200+
201+
### ACL orphans (manual fix required)
202+
203+
An **orphan** is a username string stored under a repo’s `users.canPush` or `users.canAuthorise` that does not match any document in `users` (match is trimmed, case-insensitive on `username`). These stale or mistyped ACL entries keep `migrate-users` in a failing state until they are removed or aligned with a real user record.
204+
205+
`acl-orphans-{timestamp}.csv` columns: `RepoID`, `RepoName`, `RepoURL`, `Field` (`canPush` / `canAuthorise`), `OrphanUsername`, `NormalizedOrphan`, `Index`. Like `url-issues-*.csv`, this file is **read-only** — it cannot be “loaded back” or applied by any script; use it as a work list, fix data in your environment, then re-run the dry-run (see [Read-only issue reports](#read-only-issue-reports-manual-fix-in-the-database)).
206+
207+
For **apply** (`migrate-users --apply --csv ...`): CSV header must be `username,email` (`lib/csv.js`). The command exits `1` on blocking email issues, ACL orphans, CSV/apply failures, or duplicate-email simulation (see [Exit codes](#exit-codes)).
208+
209+
CSV input: UTF‑8, one row per line, only those two columns; parser is minimal (quoted commas OK, **`""`** escapes inside fields not supported). Prefer export without BOM.
210+
211+
Phase 2 report files (when applicable): `users-audit-*.csv`, `acl-orphans-*.csv`, `email-changes-*.csv`, plus `report-*.yaml` (full nested report).
212+
213+
---
214+
215+
## Pre-upgrade checklist
216+
217+
```bash
218+
export MONGO_URI="mongodb://host:27017"
219+
export DB_NAME="git-proxy"
220+
221+
# Phase 1 — repo URL normalization
222+
npm run migrate:urls
223+
npm run backup:urls
224+
npm run migrate:urls -- --apply
225+
npm run migrate:urls # expect reposNeedingUpdate: 0; exit 1 if URL issues remain
226+
227+
# Phase 2 — email + ACL (timing vs app upgrade — your runbook)
228+
npm run migrate:users
229+
npm run backup:users
230+
npm run migrate:users -- --apply --csv ./mappings.csv
231+
# fix ACL orphans manually (acl-orphans-*.csv is not re-importable)
232+
npm run migrate:users # repeat until orphanCount: 0 and exit 0
233+
```

scripts/migrate/backup-urls.js

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/usr/bin/env node
2+
3+
/**
4+
* Copyright 2026 GitProxy Contributors
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
/**
20+
* Backup: Create backup of repos without .git suffix before migration
21+
*
22+
* BACKUP ONLY
23+
*
24+
* Usage:
25+
* npm run backup:urls
26+
* # optional: --dbType mongo|fs (default mongo)
27+
*/
28+
29+
const config = require('./lib/config');
30+
const { createDatastoreFromArgv } = require('./lib/datastore');
31+
const { analyzeReposWithDatastore } = require('./lib/analyze-urls');
32+
const { generateReports } = require('./lib/reporting');
33+
const { createBackup } = require('./lib/common');
34+
35+
config.ensureReportsDir();
36+
37+
async function main() {
38+
const argv = process.argv.slice(2);
39+
let ds;
40+
41+
try {
42+
ds = await createDatastoreFromArgv(argv);
43+
const { allRepos, report } = await analyzeReposWithDatastore(ds);
44+
const issues = Array.isArray(report.issues) ? report.issues : [];
45+
46+
if (report.reposNeedingUpdate === 0 && issues.length === 0) {
47+
console.log('\n=== BACKUP PHASE ===');
48+
console.log('No repos need migration - backup not necessary');
49+
process.exit(0);
50+
}
51+
52+
console.log('\n=== BACKUP PHASE ===');
53+
const repoById = new Map(allRepos.map((r) => [r._id?.toString?.() ?? String(r._id ?? ''), r]));
54+
const backupData = [];
55+
56+
for (const change of report.changes) {
57+
const repo = repoById.get(change.repoId);
58+
if (!repo) continue;
59+
backupData.push({
60+
...repo,
61+
backupReason: 'missing-dot-git',
62+
normalizedUrl: change.oldUrl,
63+
newUrl: change.newUrl,
64+
});
65+
}
66+
67+
for (const issue of issues) {
68+
const repo = repoById.get(issue.repoId);
69+
if (!repo) continue;
70+
backupData.push({
71+
...repo,
72+
backupReason: 'url-issue',
73+
rawUrl: issue.rawUrl,
74+
normalizedUrl: issue.normalizedUrl,
75+
issueReason: issue.reason,
76+
issueScheme: issue.scheme,
77+
});
78+
}
79+
80+
const backupPath = createBackup(config.reportsDir, 'backup-urls', backupData);
81+
console.log(`SUCCESS Backup created: ${backupPath}`);
82+
console.log(` (${report.reposNeedingUpdate} repos missing .git, ${issues.length} URL issues)`);
83+
console.log('\nBackup completed. Ready to apply migration:');
84+
console.log(' node scripts/migrate/migrate-urls.js --apply');
85+
86+
const timestamp = Date.now();
87+
report.mode = 'backup-only';
88+
generateReports(config.reportsDir, report, timestamp);
89+
90+
process.exit(0);
91+
} catch (error) {
92+
console.error('FATAL ERROR:', error.message);
93+
process.exit(1);
94+
} finally {
95+
if (ds) {
96+
await ds.close().catch(() => {});
97+
}
98+
}
99+
}
100+
101+
main();

0 commit comments

Comments
 (0)