Skip to content

Commit e046237

Browse files
Merge pull request #40 from TemoaProject/feat/temoa_hash_in_manifest
2 parents 9d04230 + 906d3bb commit e046237

6 files changed

Lines changed: 160 additions & 12 deletions

File tree

docs/source/usage.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ Prepares a dataset for release by uploading it to the staging area and updating
3333
uv run datamanager prepare <dataset-name.sqlite> <path/to/local/file.sqlite>
3434
```
3535

36+
When preparing a dataset, you will be prompted for an optional **Temoa Repository Hash** (git commit hash). This helps track which version of the temoa repository this database works against. You can:
37+
38+
- Enter a valid git commit hash (e.g., `abc1234` or `a1b2c3d4e5f6...`)
39+
- Press Enter to skip (optional field)
40+
3641
After running `prepare`, follow the on-screen instructions:
3742

3843
1. `git add manifest.json`
@@ -44,12 +49,20 @@ After running `prepare`, follow the on-screen instructions:
4449

4550
### `list-datasets`
4651

47-
Lists all datasets currently tracked in `manifest.json`.
52+
Lists all datasets currently tracked in `manifest.json`, including the latest version, update time, SHA256 hash, and Temoa repository hash (if available).
4853

4954
```bash
5055
uv run datamanager list-datasets
5156
```
5257

58+
The output includes:
59+
60+
- **Dataset Name**: The logical name of the dataset
61+
- **Latest Version**: The most recent version tag
62+
- **Last Updated**: When the latest version was created (relative time and absolute timestamp)
63+
- **SHA256**: First 12 characters of the file hash
64+
- **Temoa Hash**: First 12 characters of the temoa repository commit hash (or "N/A" if not specified)
65+
5366
![list_datasets](../../assets/list_datasets.png)
5467

5568
### `pull`

docs/source/workflow.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ Use the `datamanager` tool to stage your changes. The `prepare` command handles
2121
uv run datamanager prepare energy-data.sqlite ./local-files/new-energy.sqlite
2222
```
2323

24-
The tool will guide you through the process. For other maintenance tasks like `rollback` or `delete`, use the corresponding command.
24+
The tool will guide you through the process, including an optional prompt for the **Temoa Repository Hash** (git commit hash) to track which version of the temoa repository this database works against. For other maintenance tasks like `rollback` or `delete`, use the corresponding command.
2525

2626
## Step 3: Commit and Push
2727

manifest.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
"r2_object_key": "test_database/v4-6d60f0035a80de92c3f3df433212699e0584a09a7d4943693ae0889d98640641.sqlite",
1111
"diffFromPrevious": "diffs/test_database.sqlite/diff-v3-to-v4.diff",
1212
"commit": "5803a97",
13+
"temoaRepoHash": null,
1314
"description": "testing the autogenerating data docs script"
1415
},
1516
{
@@ -19,6 +20,7 @@
1920
"r2_object_key": "test_database/v3-6c37e0744a6f49f8b3e5b24b74080c2ae845b925633ccefa81193201639bee12.sqlite",
2021
"diffFromPrevious": "diffs/test_database.sqlite/diff-v2-to-v3.diff",
2122
"commit": "ecc49b5",
23+
"temoaRepoHash": null,
2224
"description": "testing sql diffing with summary"
2325
},
2426
{
@@ -28,6 +30,7 @@
2830
"r2_object_key": "test_database/v2-e287b00772296e3ae8d65699570662ff316d8dae50deef4041fde65ca73202a5.sqlite",
2931
"diffFromPrevious": "diffs/test_database.sqlite/diff-v1-to-v2.diff",
3032
"commit": "a621125",
33+
"temoaRepoHash": null,
3134
"description": "updating test_database to get multiple versions"
3235
}
3336
]

src/datamanager/__main__.py

Lines changed: 91 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import subprocess
33
from datetime import datetime, timezone
44
import tempfile
5+
import re
56
from dateutil.parser import isoparse
67
from pathlib import Path
78

@@ -41,6 +42,22 @@ def _rel(iso: str) -> str:
4142
return f"{hours} h ago"
4243

4344

45+
def _validate_temoa_hash(temoa_hash: str) -> bool:
46+
"""
47+
Validates that a temoa repo hash looks like a valid git commit hash.
48+
accepts 4-40 hexadecimal characters (case-insensitive).
49+
"""
50+
if not temoa_hash or not temoa_hash.strip():
51+
return True # Empty is allowed (optional field)
52+
53+
temoa_hash = temoa_hash.strip()
54+
# Git commit hashes are hexadecimal and can be 4-40 characters
55+
if re.match(r"^[a-fA-F0-9]{4,40}$", temoa_hash):
56+
return True
57+
58+
return False
59+
60+
4461
# Initialize Typer app and Rich console
4562
app = typer.Typer(
4663
name="datamanager",
@@ -97,15 +114,26 @@ def verify(ctx: typer.Context) -> None:
97114
def list_datasets(ctx: typer.Context) -> None:
98115
"""Lists all datasets tracked in the manifest."""
99116
data = manifest.read_manifest()
100-
table = Table("Dataset Name", "Latest Version", "Last Updated", "SHA256")
117+
table = Table(
118+
"Dataset Name", "Latest Version", "Last Updated", "SHA256", "Temoa Hash"
119+
)
101120
for item in data:
102121
latest = item["history"][0]
122+
temoa_hash_display = "N/A"
123+
if latest.get("temoaRepoHash"):
124+
temoa_hash_display = (
125+
f"{latest['temoaRepoHash'][:12]}..."
126+
if len(str(latest["temoaRepoHash"])) > 12
127+
else str(latest["temoaRepoHash"])
128+
)
129+
103130
table.add_row(
104131
item["fileName"],
105132
latest["version"],
106133
# latest["timestamp"],
107134
f"{_rel(latest['timestamp'])} ({latest['timestamp']})",
108135
f"{latest['sha256'][:12]}...",
136+
temoa_hash_display,
109137
)
110138
console.print(table)
111139

@@ -128,8 +156,12 @@ def _run_pull_logic(name: str, version: str, output: Optional[Path]) -> None:
128156
else:
129157
final_path = output
130158

159+
temoa_hash_info = ""
160+
if version_entry.get("temoaRepoHash"):
161+
temoa_hash_info = f", temoa: {version_entry['temoaRepoHash']}"
162+
131163
console.print(
132-
f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}) to [cyan]{final_path}[/]"
164+
f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}{temoa_hash_info}) to [cyan]{final_path}[/]"
133165
)
134166

135167
success = core.pull_and_verify(
@@ -190,10 +222,12 @@ def _pull_interactive(ctx: typer.Context) -> None:
190222
console.print(f"[red]Error: No version history found for {selected_name}.[/]")
191223
return
192224

193-
version_choices = [
194-
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})"
195-
for entry in dataset["history"]
196-
]
225+
version_choices = []
226+
for entry in dataset["history"]:
227+
temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}"
228+
version_choices.append(
229+
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})"
230+
)
197231
selected_version_str = questionary.select(
198232
"Which version would you like to pull?", choices=version_choices
199233
).ask()
@@ -231,6 +265,48 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
231265
dataset = manifest.get_dataset(name)
232266
client = core.get_r2_client() # Moved up to be available for diffing
233267

268+
# Prompt for temoa repo hash (optional)
269+
temoa_hash = None
270+
if not ctx.obj.get("no_prompt"):
271+
console.print("\n[bold]Temoa Repository Hash[/]")
272+
console.print(
273+
"This helps track which version of the temoa repository this database works against."
274+
)
275+
276+
while True:
277+
temoa_hash_input = questionary.text(
278+
"Enter the temoa repository commit hash (optional, press Enter to skip):",
279+
default="",
280+
).ask()
281+
282+
if not temoa_hash_input or not temoa_hash_input.strip():
283+
console.print("Skipping temoa repo hash (optional field).")
284+
break
285+
286+
temoa_hash_candidate = temoa_hash_input.strip()
287+
if _validate_temoa_hash(temoa_hash_candidate):
288+
temoa_hash = temoa_hash_candidate
289+
console.print(f"Using temoa repo hash: [green]{temoa_hash}[/]")
290+
break
291+
else:
292+
console.print(
293+
f"[bold red]Invalid format:[/] '{temoa_hash_candidate}' doesn't look like a valid git commit hash."
294+
)
295+
console.print(
296+
"Git commit hashes should contain only hexadecimal characters (0-9, a-f, A-F) and be 4-40 characters long."
297+
)
298+
retry = questionary.confirm(
299+
"Would you like to try again?", default=True
300+
).ask()
301+
if not retry:
302+
console.print("Skipping temoa repo hash.")
303+
break
304+
else:
305+
# In non-interactive mode, temoa hash is not provided
306+
console.print(
307+
"Running in non-interactive mode - temoa repo hash not specified."
308+
)
309+
234310
# Check for changes BEFORE doing any uploads.
235311
if dataset:
236312
latest_version = dataset["history"][0]
@@ -287,6 +363,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
287363
if diff_git_path
288364
else None, # Add path to entry
289365
"commit": "pending-merge",
366+
"temoaRepoHash": temoa_hash,
290367
"description": "pending-merge",
291368
}
292369
manifest.add_history_entry(name, new_entry)
@@ -307,6 +384,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
307384
"staging_key": staging_key,
308385
"diffFromPrevious": None, # Explicitly None for new datasets
309386
"commit": "pending-merge",
387+
"temoaRepoHash": temoa_hash,
310388
"description": "pending-merge",
311389
}
312390
],
@@ -419,6 +497,7 @@ def _run_rollback_logic(ctx: typer.Context, name: str, to_version: str) -> None:
419497
"r2_object_key": target_entry["r2_object_key"],
420498
"diffFromPrevious": None,
421499
"commit": "pending-merge",
500+
"temoaRepoHash": target_entry.get("temoaRepoHash"),
422501
"description": f"Rollback to version {target_entry['version']}",
423502
}
424503

@@ -481,10 +560,12 @@ def _rollback_interactive(ctx: typer.Context) -> None:
481560
return
482561

483562
# Exclude the latest version from the choices, as you can't roll back to it.
484-
version_choices = [
485-
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})"
486-
for entry in dataset["history"][1:] # Start from the second entry
487-
]
563+
version_choices = []
564+
for entry in dataset["history"][1:]: # Start from the second entry
565+
temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}"
566+
version_choices.append(
567+
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})"
568+
)
488569
selected_version_str = questionary.select(
489570
"Which version do you want to restore?", choices=version_choices
490571
).ask()

tests/test_main.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,12 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N
2121
mocker.patch("datamanager.core.get_r2_client")
2222
mock_upload = mocker.patch("datamanager.core.upload_to_staging")
2323

24+
# Mock the temoa hash prompt to return empty (skip)
25+
mocker.patch(
26+
"questionary.text",
27+
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
28+
)
29+
2430
result = runner.invoke(app, ["prepare", "new-dataset.sqlite", str(new_file)])
2531

2632
assert result.exit_code == 0, result.stdout
@@ -32,6 +38,7 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N
3238
assert dataset is not None
3339
assert dataset["history"][0]["diffFromPrevious"] is None
3440
assert dataset["history"][0]["description"] == "pending-merge"
41+
assert dataset["history"][0]["temoaRepoHash"] is None
3542

3643

3744
def test_prepare_for_update_with_small_diff(
@@ -44,6 +51,12 @@ def test_prepare_for_update_with_small_diff(
4451
mocker.patch("datamanager.core.upload_to_staging")
4552
mocker.patch("datamanager.core.download_from_r2")
4653

54+
# Mock the temoa hash prompt to return empty (skip)
55+
mocker.patch(
56+
"questionary.text",
57+
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
58+
)
59+
4760
# Prepare a fake summary and full diff
4861
fake_summary = "# summary: 1 add, 1 del\n"
4962
fake_full = "--- a\n+++ b\n-foo\n+bar\n"
@@ -80,6 +93,13 @@ def test_prepare_for_update_with_large_diff(
8093
mock_r2_client.head_object.return_value = {"ContentLength": 1024}
8194
mocker.patch("datamanager.core.upload_to_staging")
8295
mocker.patch("datamanager.core.download_from_r2")
96+
97+
# Mock the temoa hash prompt to return empty (skip)
98+
mocker.patch(
99+
"questionary.text",
100+
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
101+
)
102+
83103
# Make the full diff larger than the default limit, but still provide a summary
84104
large_full = "line\n" * (settings.max_diff_lines + 1)
85105
small_summary = "# summary: huge diff, see details in PR\n"
@@ -115,6 +135,12 @@ def test_prepare_no_changes(test_repo: Path, mocker: MockerFixture) -> None:
115135
os.chdir(test_repo)
116136
mock_upload = mocker.patch("datamanager.core.upload_to_staging")
117137

138+
# Mock the temoa hash prompt to return empty (skip)
139+
mocker.patch(
140+
"questionary.text",
141+
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
142+
)
143+
118144
result = runner.invoke(app, ["prepare", "core-dataset.sqlite", "new_data.sqlite"])
119145

120146
assert result.exit_code == 0, result.stdout

tests/test_manifest.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import pytest
66

77
from datamanager import manifest
8+
from datamanager.__main__ import _validate_temoa_hash
89

910

1011
def test_read_manifest(test_repo: Path) -> None:
@@ -48,3 +49,27 @@ def test_update_latest_history_entry(test_repo: Path) -> None:
4849
assert data[0]["history"][0]["version"] == "v2"
4950
assert data[0]["history"][0]["commit"] == "abcdef"
5051
assert data[0]["latestVersion"] == "v2"
52+
53+
54+
def test_validate_temoa_hash() -> None:
55+
"""Test the temoa hash validation function."""
56+
# Valid short hash
57+
assert _validate_temoa_hash("abc123")
58+
assert _validate_temoa_hash("ABCDEF")
59+
60+
# Valid long hash
61+
assert _validate_temoa_hash("a" * 40)
62+
assert _validate_temoa_hash("1234567890abcdef" * 2)
63+
64+
# Invalid formats
65+
assert not _validate_temoa_hash("gggggg") # 'g' is not hex
66+
assert not _validate_temoa_hash("abc123g") # contains 'g'
67+
assert not _validate_temoa_hash("abc12345-") # contains dash
68+
assert _validate_temoa_hash("") # Empty is allowed (optional)
69+
assert _validate_temoa_hash(" ") # Whitespace only is allowed (optional)
70+
71+
# Too short
72+
assert not _validate_temoa_hash("abc") # Less than 4 chars
73+
74+
# Too long
75+
assert not _validate_temoa_hash("a" * 41) # More than 40 chars

0 commit comments

Comments
 (0)