From 3b6ce15903297ea9b5d562079b72e372d9ea13c4 Mon Sep 17 00:00:00 2001 From: Renovate Bot Date: Fri, 24 Apr 2026 18:35:00 +0000 Subject: [PATCH 01/27] Update docker/login-action action to v4 --- .github/workflows/build-and-push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml index 4e7843dd..4c31e8c9 100644 --- a/.github/workflows/build-and-push.yml +++ b/.github/workflows/build-and-push.yml @@ -32,13 +32,13 @@ jobs: uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 - name: Login to DockerHub - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4 with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Login to GitHub Container Registry - uses: docker/login-action@c94ce9fb468520275223c153574b00df6fe4bcc9 # v3 + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4 with: registry: ghcr.io username: ${{ github.repository_owner }} From 7124e0c643475cd243901d05e9201ca975833e7a Mon Sep 17 00:00:00 2001 From: Renovate Bot Date: Fri, 24 Apr 2026 18:35:08 +0000 Subject: [PATCH 02/27] Update docker/setup-buildx-action action to v4 --- .github/workflows/build-and-push.yml | 2 +- .github/workflows/full-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml index 4c31e8c9..9074db3a 100644 --- a/.github/workflows/build-and-push.yml +++ b/.github/workflows/build-and-push.yml @@ -29,7 +29,7 @@ jobs: persist-credentials: false - name: Set up Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 - name: Login to DockerHub uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4 diff --git a/.github/workflows/full-test.yml b/.github/workflows/full-test.yml index f183efda..2b72424b 100644 --- a/.github/workflows/full-test.yml +++ b/.github/workflows/full-test.yml @@ -24,7 +24,7 @@ jobs: persist-credentials: false - name: Set up Docker Buildx - uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 - name: Read Photon version from .last_release id: photon_version From cf98c44b8da4215fbe3a02b7ebb7cd025acaf525 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Tue, 28 Apr 2026 00:49:57 +0200 Subject: [PATCH 03/27] feat: multi-stage build without uv runtime dependency --- Dockerfile | 54 +++++++++++++++++++++++++++++------------- src/process_manager.py | 4 ++-- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/Dockerfile b/Dockerfile index c15b088e..1c1578b1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,40 @@ -FROM eclipse-temurin:21.0.9_10-jre-noble +FROM ubuntu:noble AS builder + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update \ + && apt-get -y install --no-install-recommends \ + python3.12 \ + && rm -rf /var/lib/apt/lists/* -# install astral uv COPY --from=ghcr.io/astral-sh/uv:0.10 /uv /usr/local/bin/ +WORKDIR /build + +COPY pyproject.toml uv.lock ./ + +ENV UV_PYTHON=/usr/bin/python3.12 \ + UV_PYTHON_PREFERENCE=only-system \ + UV_LINK_MODE=copy \ + UV_PROJECT_ENVIRONMENT=/photon/.venv + +RUN uv sync --locked --no-dev --no-install-project + + +FROM eclipse-temurin:21.0.9_10-jre-noble + ARG DEBIAN_FRONTEND=noninteractive ARG PHOTON_VERSION ARG PUID=9011 ARG PGID=9011 RUN apt-get update \ - && apt-get -y install --no-install-recommends \ - lbzip2 \ - gosu \ - python3.12 \ - curl \ - && rm -rf /var/lib/apt/lists/* + && apt-get -y install --no-install-recommends \ + lbzip2 \ + gosu \ + python3.12 \ + curl \ + && rm -rf /var/lib/apt/lists/* RUN groupadd -g ${PGID} -o photon && \ useradd -l -u ${PUID} -g photon -o -s /bin/false -m -d /photon photon @@ -27,24 +47,24 @@ ADD https://github.com/komoot/photon/releases/download/${PHOTON_VERSION}/photon- COPY src/ ./src/ COPY entrypoint.sh . -COPY pyproject.toml . -COPY uv.lock . -RUN gosu photon uv sync --locked +COPY --from=builder /photon/.venv /photon/.venv +ENV PATH="/photon/.venv/bin:${PATH}" \ + VIRTUAL_ENV=/photon/.venv RUN chmod 644 /photon/photon.jar && \ chown -R photon:photon /photon LABEL org.opencontainers.image.title="photon-docker" \ - org.opencontainers.image.description="Unofficial docker image for the Photon Geocoder" \ - org.opencontainers.image.url="https://github.com/rtuszik/photon-docker" \ - org.opencontainers.image.source="https://github.com/rtuszik/photon-docker" \ - org.opencontainers.image.documentation="https://github.com/rtuszik/photon-docker#readme" + org.opencontainers.image.description="Unofficial docker image for the Photon Geocoder" \ + org.opencontainers.image.url="https://github.com/rtuszik/photon-docker" \ + org.opencontainers.image.source="https://github.com/rtuszik/photon-docker" \ + org.opencontainers.image.documentation="https://github.com/rtuszik/photon-docker#readme" EXPOSE 2322 HEALTHCHECK --interval=30s --timeout=10s --start-period=240s --retries=3 \ - CMD curl -f http://localhost:2322/status || exit 1 + CMD curl -f http://localhost:2322/status || exit 1 ENTRYPOINT ["/bin/sh", "entrypoint.sh"] -CMD ["uv", "run", "-m", "src.process_manager"] +CMD ["/photon/.venv/bin/python", "-m", "src.process_manager"] diff --git a/src/process_manager.py b/src/process_manager.py index 392beb06..f9204182 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -79,7 +79,7 @@ def handle_shutdown(self, signum, _frame): def run_initial_setup(self): logger.info("Running initial setup...") - result = subprocess.run(["uv", "run", "--no-sync", "-m", "src.entrypoint", "setup"], check=False, cwd="/photon") # noqa S603 + result = subprocess.run([sys.executable, "-m", "src.entrypoint", "setup"], check=False, cwd="/photon") # noqa S603 if result.returncode != 0: logger.error("Setup failed!") @@ -218,7 +218,7 @@ def run_update(self): if config.UPDATE_STRATEGY == "SEQUENTIAL": self.stop_photon() - result = subprocess.run(["uv", "run", "--no-sync", "-m", "src.updater"], check=False, cwd="/photon") # noqa S603 + result = subprocess.run([sys.executable, "-m", "src.updater"], check=False, cwd="/photon") # noqa S603 if result.returncode == 0: logger.info("Update process completed, verifying Photon health...") From 1e6825f10eaee795620812e8005244dc7a497d9d Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sun, 10 May 2026 13:17:07 +0200 Subject: [PATCH 04/27] increase test coverage --- pyproject.toml | 3 + tests/test_check_remote.py | 209 ++++++++++++++ tests/test_downloader.py | 493 ++++++++++++++++++++++++++++++++++ tests/test_entrypoint.py | 208 ++++++++++++++ tests/test_filesystem.py | 299 +++++++++++++++++++++ tests/test_process_manager.py | 441 ++++++++++++++++++++++++++++++ tests/test_updater.py | 57 ++++ tests/utils/test_logger.py | 71 +++++ tests/utils/test_notify.py | 65 +++++ 9 files changed, 1846 insertions(+) create mode 100644 tests/test_check_remote.py create mode 100644 tests/test_downloader.py create mode 100644 tests/test_entrypoint.py create mode 100644 tests/test_filesystem.py create mode 100644 tests/test_process_manager.py create mode 100644 tests/test_updater.py create mode 100644 tests/utils/test_logger.py create mode 100644 tests/utils/test_notify.py diff --git a/pyproject.toml b/pyproject.toml index 82759743..3b39cfb0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,9 @@ dev = [ requires = ["uv_build>=0.11.7,<0.12.0"] build-backend = "uv_build" +[tool.coverage.run] +omit = ["tests/*"] + [tool.ruff] indent-width = 4 line-length = 120 diff --git a/tests/test_check_remote.py b/tests/test_check_remote.py new file mode 100644 index 00000000..9c71d00b --- /dev/null +++ b/tests/test_check_remote.py @@ -0,0 +1,209 @@ +import datetime +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from requests.exceptions import RequestException + +from src import check_remote +from src.utils import config + + +@pytest.fixture +def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + photon_data_dir.mkdir() + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + return data_dir + + +def _mock_response(status_code=200, headers=None): + resp = MagicMock() + resp.status_code = status_code + resp.headers = headers or {} + resp.raise_for_status = MagicMock() + return resp + + +def test_get_remote_file_size_uses_content_length(): + resp = _mock_response(headers={"content-length": "12345"}) + with patch("src.check_remote.requests.head", return_value=resp): + assert check_remote.get_remote_file_size("https://example.com/x") == 12345 + + +def test_get_remote_file_size_falls_back_to_range_request(): + head_resp = _mock_response(headers={}) + range_resp = _mock_response(status_code=206, headers={"content-range": "bytes 0-0/9876"}) + with ( + patch("src.check_remote.requests.head", return_value=head_resp), + patch("src.check_remote.requests.get", return_value=range_resp), + ): + assert check_remote.get_remote_file_size("https://example.com/x") == 9876 + + +def test_get_remote_file_size_raises_when_no_size_returned(): + head_resp = _mock_response(headers={}) + range_resp = _mock_response(status_code=200, headers={}) + with ( + patch("src.check_remote.requests.head", return_value=head_resp), + patch("src.check_remote.requests.get", return_value=range_resp), + pytest.raises(check_remote.RemoteFileSizeError, match="did not return file size"), + ): + check_remote.get_remote_file_size("https://example.com/x") + + +def test_get_remote_file_size_wraps_request_errors(): + with ( + patch("src.check_remote.requests.head", side_effect=RequestException("boom")), + pytest.raises(check_remote.RemoteFileSizeError, match="Could not determine remote file size"), + ): + check_remote.get_remote_file_size("https://example.com/x") + + +def test_get_remote_file_size_ignores_non_digit_range_total(): + head_resp = _mock_response(headers={}) + range_resp = _mock_response(status_code=206, headers={"content-range": "bytes 0-0/*"}) + with ( + patch("src.check_remote.requests.head", return_value=head_resp), + patch("src.check_remote.requests.get", return_value=range_resp), + pytest.raises(check_remote.RemoteFileSizeError), + ): + check_remote.get_remote_file_size("https://example.com/x") + + +def test_get_remote_time_returns_parsed_datetime(): + resp = _mock_response(headers={"last-modified": "Wed, 21 Oct 2026 07:28:00 GMT"}) + with patch("src.check_remote.requests.head", return_value=resp): + result = check_remote.get_remote_time("https://example.com") + assert result is not None + assert result.year == 2026 + assert result.month == 10 + assert result.day == 21 + + +def test_get_remote_time_returns_none_when_header_missing(): + resp = _mock_response(headers={}) + with patch("src.check_remote.requests.head", return_value=resp): + assert check_remote.get_remote_time("https://example.com") is None + + +def test_get_remote_time_returns_none_on_request_error(): + with patch("src.check_remote.requests.head", side_effect=RequestException("nope")): + assert check_remote.get_remote_time("https://example.com") is None + + +def test_get_local_time_returns_marker_mtime_when_present(fake_dirs: Path): + marker = fake_dirs / ".photon-index-updated" + marker.write_text("") + os.utime(marker, (1_000_000, 1_000_000)) + assert check_remote.get_local_time(str(fake_dirs / "missing")) == 1_000_000 + + +def test_get_local_time_returns_path_mtime_when_no_marker(fake_dirs: Path): + target = fake_dirs / "node_1" + target.mkdir() + os.utime(target, (2_000_000, 2_000_000)) + assert check_remote.get_local_time(str(target)) == 2_000_000 + + +def test_get_local_time_returns_zero_when_path_missing(fake_dirs: Path): + assert check_remote.get_local_time(str(fake_dirs / "missing")) == 0.0 + + +def test_compare_mtime_returns_false_when_remote_time_unknown(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com") + with patch("src.check_remote.get_remote_time", return_value=None): + assert check_remote.compare_mtime() is False + + +def test_compare_mtime_returns_false_on_invalid_region(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "REGION", "atlantis") + assert check_remote.compare_mtime() is False + + +def test_compare_mtime_with_marker_compares_directly(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com") + + marker = fake_dirs / ".photon-index-updated" + marker.write_text("") + local_ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC).timestamp() + os.utime(marker, (local_ts, local_ts)) + + remote_dt = datetime.datetime(2026, 1, 2, tzinfo=datetime.UTC) + with patch("src.check_remote.get_remote_time", return_value=remote_dt): + assert check_remote.compare_mtime() is True + + +def test_compare_mtime_with_marker_returns_false_when_remote_older(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com") + + marker = fake_dirs / ".photon-index-updated" + marker.write_text("") + local_ts = datetime.datetime(2026, 1, 10, tzinfo=datetime.UTC).timestamp() + os.utime(marker, (local_ts, local_ts)) + + remote_dt = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC) + with patch("src.check_remote.get_remote_time", return_value=remote_dt): + assert check_remote.compare_mtime() is False + + +def test_compare_mtime_directory_uses_grace_period(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com") + + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir() + local_ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC).timestamp() + os.utime(node_dir, (local_ts, local_ts)) + + remote_within_grace = datetime.datetime(2026, 1, 5, tzinfo=datetime.UTC) + with patch("src.check_remote.get_remote_time", return_value=remote_within_grace): + assert check_remote.compare_mtime() is False + + remote_past_grace = datetime.datetime(2026, 1, 20, tzinfo=datetime.UTC) + with patch("src.check_remote.get_remote_time", return_value=remote_past_grace): + assert check_remote.compare_mtime() is True + + +def test_check_index_age_returns_true_when_min_date_unset(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MIN_INDEX_DATE", None) + assert check_remote.check_index_age() is True + + +def test_check_index_age_warns_and_returns_true_on_bad_format(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MIN_INDEX_DATE", "2026-01-01") + assert check_remote.check_index_age() is True + + +def test_check_index_age_returns_true_when_no_local_index(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + assert check_remote.check_index_age() is True + + +def test_check_index_age_returns_true_when_update_required_due_to_old_index( + fake_dirs: Path, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.06.26") + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir() + local_ts = datetime.datetime(2026, 1, 1, tzinfo=datetime.UTC).timestamp() + os.utime(node_dir, (local_ts, local_ts)) + assert check_remote.check_index_age() is True + + +def test_check_index_age_returns_false_when_local_meets_min(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir() + local_ts = datetime.datetime(2026, 6, 1, tzinfo=datetime.UTC).timestamp() + os.utime(node_dir, (local_ts, local_ts)) + assert check_remote.check_index_age() is False diff --git a/tests/test_downloader.py b/tests/test_downloader.py new file mode 100644 index 00000000..c55f8e8f --- /dev/null +++ b/tests/test_downloader.py @@ -0,0 +1,493 @@ +import json +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from requests.exceptions import RequestException + +from src import downloader +from src.utils import config + + +@pytest.fixture +def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + temp_dir = data_dir / "temp" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) + monkeypatch.setattr(config, "TEMP_DIR", str(temp_dir)) + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + return data_dir + + +def _mock_response(status_code=200, headers=None, chunks=None): + resp = MagicMock() + resp.status_code = status_code + resp.headers = headers or {} + resp.raise_for_status = MagicMock() + resp.iter_content = MagicMock(return_value=iter(chunks or [])) + resp.__enter__ = MagicMock(return_value=resp) + resp.__exit__ = MagicMock(return_value=False) + return resp + + +def test_get_available_space_returns_bytes(tmp_path: Path): + space = downloader.get_available_space(str(tmp_path)) + assert space > 0 + + +def test_get_available_space_returns_zero_on_invalid_path(): + assert downloader.get_available_space("/this/does/not/exist") == 0 + + +def test_check_disk_space_requirements_parallel_passes(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(downloader, "get_available_space", lambda _: 100 * 1024**3) + assert downloader.check_disk_space_requirements(10 * 1024**3, is_parallel=True) is True + + +def test_check_disk_space_requirements_parallel_fails_on_temp(monkeypatch: pytest.MonkeyPatch): + sizes = iter([1, 100 * 1024**3]) + monkeypatch.setattr(downloader, "get_available_space", lambda _: next(sizes)) + assert downloader.check_disk_space_requirements(10 * 1024**3, is_parallel=True) is False + + +def test_check_disk_space_requirements_parallel_fails_on_data(monkeypatch: pytest.MonkeyPatch): + sizes = iter([100 * 1024**3, 1]) + monkeypatch.setattr(downloader, "get_available_space", lambda _: next(sizes)) + assert downloader.check_disk_space_requirements(10 * 1024**3, is_parallel=True) is False + + +def test_check_disk_space_requirements_sequential_passes(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(downloader, "get_available_space", lambda _: 100 * 1024**3) + assert downloader.check_disk_space_requirements(10 * 1024**3, is_parallel=False) is True + + +def test_check_disk_space_requirements_sequential_fails(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(downloader, "get_available_space", lambda _: 1) + assert downloader.check_disk_space_requirements(10 * 1024**3, is_parallel=False) is False + + +def test_get_download_state_file_appends_suffix(): + assert downloader.get_download_state_file("/x/y/file.bin") == "/x/y/file.bin.download_state" + + +def test_save_and_load_download_state_roundtrip(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"hello") + downloader.save_download_state(str(dest), "https://example.com/x", 5, 100) + state = downloader.load_download_state(str(dest)) + assert state["url"] == "https://example.com/x" + assert state["downloaded_bytes"] == 5 + assert state["total_size"] == 100 + assert state["file_size"] == 5 + + +def test_load_download_state_returns_empty_when_no_state_file(tmp_path: Path): + assert downloader.load_download_state(str(tmp_path / "nope")) == {} + + +def test_load_download_state_resyncs_with_actual_file_size(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"x" * 200) + state_file = Path(downloader.get_download_state_file(str(dest))) + state_file.write_text(json.dumps({"url": "u", "downloaded_bytes": 50, "total_size": 1000, "file_size": 50})) + + state = downloader.load_download_state(str(dest)) + assert state["downloaded_bytes"] == 200 + assert state["file_size"] == 200 + + +def test_load_download_state_drops_state_when_file_smaller(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"x" * 10) + state_file = Path(downloader.get_download_state_file(str(dest))) + state_file.write_text(json.dumps({"url": "u", "downloaded_bytes": 50, "total_size": 1000, "file_size": 50})) + + assert downloader.load_download_state(str(dest)) == {} + assert not state_file.exists() + + +def test_load_download_state_handles_corrupted_state(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"data") + state_file = Path(downloader.get_download_state_file(str(dest))) + state_file.write_text("{not json") + + assert downloader.load_download_state(str(dest)) == {} + assert not state_file.exists() + + +def test_cleanup_download_state_removes_file(tmp_path: Path): + dest = tmp_path / "file.bin" + state_file = Path(downloader.get_download_state_file(str(dest))) + state_file.write_text("{}") + downloader.cleanup_download_state(str(dest)) + assert not state_file.exists() + + +def test_cleanup_download_state_no_op_when_missing(tmp_path: Path): + downloader.cleanup_download_state(str(tmp_path / "missing")) + + +def test_cleanup_download_state_swallows_remove_errors(tmp_path: Path): + dest = tmp_path / "file.bin" + state_file = Path(downloader.get_download_state_file(str(dest))) + state_file.write_text("{}") + with patch("src.downloader.os.remove", side_effect=OSError("locked")): + downloader.cleanup_download_state(str(dest)) + + +def test_supports_range_requests_true(): + resp = _mock_response(headers={"accept-ranges": "bytes"}) + with patch("src.downloader.requests.head", return_value=resp): + assert downloader.supports_range_requests("https://example.com/x") is True + + +def test_supports_range_requests_false_when_header_missing(): + resp = _mock_response(headers={}) + with patch("src.downloader.requests.head", return_value=resp): + assert downloader.supports_range_requests("https://example.com/x") is False + + +def test_supports_range_requests_false_on_error(): + with patch("src.downloader.requests.head", side_effect=RequestException("nope")): + assert downloader.supports_range_requests("https://example.com/x") is False + + +def test_get_download_url_uses_file_url_when_set(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FILE_URL", "https://override.example/file.tar.bz2") + assert downloader.get_download_url() == "https://override.example/file.tar.bz2" + + +def test_get_download_url_constructs_from_region_and_base(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") + monkeypatch.setattr(config, "REGION", "europe") + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + url = downloader.get_download_url() + assert url == "https://example.com/public/europe/photon-db-europe-1.0-latest.tar.bz2" + + +def test_prepare_download_no_state(tmp_path: Path): + dest = tmp_path / "file.bin" + pos, mode = downloader._prepare_download("https://example.com/x", str(dest)) + assert pos == 0 + assert mode == "wb" + + +def test_prepare_download_resumes_when_state_matches(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"x" * 50) + downloader.save_download_state(str(dest), "https://example.com/x", 50, 100) + + pos, mode = downloader._prepare_download("https://example.com/x", str(dest)) + assert pos == 50 + assert mode == "ab" + + +def test_prepare_download_starts_fresh_when_url_changed(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"x" * 50) + downloader.save_download_state(str(dest), "https://old.example.com/x", 50, 100) + + pos, mode = downloader._prepare_download("https://new.example.com/x", str(dest)) + assert pos == 0 + assert mode == "wb" + + +def test_get_download_headers_returns_range_when_resuming(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(downloader, "supports_range_requests", lambda _: True) + assert downloader._get_download_headers(123, "https://example.com/x") == {"Range": "bytes=123-"} + + +def test_get_download_headers_empty_when_no_resume(): + assert downloader._get_download_headers(0, "https://example.com/x") == {} + + +def test_get_download_headers_empty_when_no_range_support(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(downloader, "supports_range_requests", lambda _: False) + assert downloader._get_download_headers(123, "https://example.com/x") == {} + + +def test_calculate_total_size_with_range_response_using_content_range(): + resp = _mock_response(status_code=206, headers={"content-range": "bytes 0-99/12345"}) + assert downloader._calculate_total_size(resp, {"Range": "bytes=0-"}, 0) == 12345 + + +def test_calculate_total_size_with_range_response_no_content_range(): + resp = _mock_response(status_code=206, headers={"content-length": "100"}) + assert downloader._calculate_total_size(resp, {"Range": "bytes=50-"}, 50) == 150 + + +def test_calculate_total_size_without_range_uses_content_length(): + resp = _mock_response(status_code=200, headers={"content-length": "9999"}) + assert downloader._calculate_total_size(resp, {}, 0) == 9999 + + +def test_handle_no_range_support_resets_when_resuming(tmp_path: Path): + dest = tmp_path / "file.bin" + dest.write_bytes(b"x" * 100) + pos, mode = downloader._handle_no_range_support(100, str(dest)) + assert pos == 0 + assert mode == "wb" + assert not dest.exists() + + +def test_handle_no_range_support_no_op_when_not_resuming(tmp_path: Path): + dest = tmp_path / "file.bin" + pos, mode = downloader._handle_no_range_support(0, str(dest)) + assert pos == 0 + assert mode is None + + +def test_create_progress_bar_returns_none_when_no_size(tmp_path: Path): + assert downloader._create_progress_bar(0, 0, str(tmp_path / "file.bin")) is None + + +def test_create_progress_bar_returns_tqdm_when_size_known(tmp_path: Path): + bar = downloader._create_progress_bar(1024, 0, str(tmp_path / "file.bin")) + assert bar is not None + bar.close() + + +def test_log_download_metrics_handles_long_download(caplog: pytest.LogCaptureFixture, tmp_path: Path): + import logging as _logging + import time + + caplog.set_level(_logging.INFO, logger="root") + downloader._log_download_metrics(10 * 1024**3, time.time() - (3 * 60 * 60), str(tmp_path / "f")) + msgs = "\n".join(r.message for r in caplog.records) + assert "Download completed" in msgs + assert "h" in msgs + + +def test_log_download_metrics_handles_short_download(caplog: pytest.LogCaptureFixture, tmp_path: Path): + import logging as _logging + import time + + caplog.set_level(_logging.INFO, logger="root") + downloader._log_download_metrics(1024**3, time.time() - 10, str(tmp_path / "f")) + assert any("Download completed" in r.message for r in caplog.records) + + +def test_log_download_metrics_no_size(caplog: pytest.LogCaptureFixture, tmp_path: Path): + import logging as _logging + + caplog.set_level(_logging.INFO, logger="root") + downloader._log_download_metrics(0, 0.0, str(tmp_path / "f")) + assert any("successfully" in r.message for r in caplog.records) + + +def test_download_file_success(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "1") + + resp = _mock_response(status_code=200, headers={"content-length": "5"}, chunks=[b"hello"]) + with patch("src.downloader.requests.get", return_value=resp): + assert downloader.download_file("https://example.com/x", str(dest)) is True + + assert dest.read_bytes() == b"hello" + assert not Path(downloader.get_download_state_file(str(dest))).exists() + + +def test_download_file_incomplete_raises_and_returns_false(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "1") + + resp = _mock_response(status_code=200, headers={"content-length": "10"}, chunks=[b"hi"]) + with patch("src.downloader.requests.get", return_value=resp): + assert downloader.download_file("https://example.com/x", str(dest)) is False + + +def test_download_file_retries_on_request_exception(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "3") + monkeypatch.setattr(downloader.time, "sleep", lambda *_: None) + + good_resp = _mock_response(status_code=200, headers={"content-length": "3"}, chunks=[b"abc"]) + + calls = {"n": 0} + + def fake_get(*_args, **_kwargs): + calls["n"] += 1 + if calls["n"] < 3: + raise RequestException("transient") + return good_resp + + with patch("src.downloader.requests.get", side_effect=fake_get): + assert downloader.download_file("https://example.com/x", str(dest)) is True + assert calls["n"] == 3 + + +def test_download_file_returns_false_when_retries_exhausted(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "2") + monkeypatch.setattr(downloader.time, "sleep", lambda *_: None) + with patch("src.downloader.requests.get", side_effect=RequestException("always")): + assert downloader.download_file("https://example.com/x", str(dest)) is False + + +def test_download_file_returns_false_on_unexpected_exception(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "1") + with patch("src.downloader.requests.get", side_effect=RuntimeError("boom")): + assert downloader.download_file("https://example.com/x", str(dest)) is False + + +def test_download_index_returns_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + def fake_download(_url, output): + Path(output).write_bytes(b"x") + return True + + with patch("src.downloader.download_file", side_effect=fake_download): + out = downloader.download_index() + + assert out == os.path.join(config.TEMP_DIR, "photon-db-latest.tar.bz2") + assert Path(out).exists() + + +def test_download_index_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + with ( + patch("src.downloader.download_file", return_value=False), + pytest.raises(Exception, match="Failed to download index"), + ): + downloader.download_index() + + +def test_download_md5_uses_explicit_url(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", "https://example.com/custom.md5") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + captured = {} + + def fake_download(url, output): + captured["url"] = url + captured["output"] = output + Path(output).write_text("md5") + return True + + with patch("src.downloader.download_file", side_effect=fake_download): + out = downloader.download_md5() + + assert captured["url"] == "https://example.com/custom.md5" + assert out.endswith("photon-db-latest.tar.bz2.md5") + + +def test_download_md5_constructs_url_when_unset(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", None) + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + captured = {} + + def fake_download(url, output): + captured["url"] = url + Path(output).write_text("md5") + return True + + with patch("src.downloader.download_file", side_effect=fake_download): + downloader.download_md5() + + assert captured["url"] == "https://example.com/public/photon-db-planet-1.0-latest.tar.bz2.md5" + + +def test_download_md5_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", "https://example.com/x.md5") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + with ( + patch("src.downloader.download_file", return_value=False), + pytest.raises(Exception, match="Failed to download MD5"), + ): + downloader.download_md5() + + +def _make_orchestrator_patches(monkeypatch: pytest.MonkeyPatch): + fake_index = str(Path(config.TEMP_DIR) / "index.tar.bz2") + fake_md5 = fake_index + ".md5" + monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") + monkeypatch.setattr(downloader, "get_remote_file_size", lambda _: 1024) + monkeypatch.setattr(downloader, "check_disk_space_requirements", lambda *_, **__: True) + monkeypatch.setattr(downloader, "download_index", lambda: fake_index) + monkeypatch.setattr(downloader, "download_md5", lambda: fake_md5) + monkeypatch.setattr(downloader, "extract_index", lambda _: None) + monkeypatch.setattr(downloader, "verify_checksum", lambda *_: True) + monkeypatch.setattr(downloader, "move_index", lambda: True) + monkeypatch.setattr(downloader, "clear_temp_dir", lambda: None) + + +def test_parallel_update_happy_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + _make_orchestrator_patches(monkeypatch) + downloader.parallel_update() + assert Path(config.TEMP_DIR).exists() + + +def test_parallel_update_skips_md5_when_configured(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_orchestrator_patches(monkeypatch) + + md5_called = {"n": 0} + + def fake_md5(): + md5_called["n"] += 1 + return str(Path(config.TEMP_DIR) / "x.md5") + + monkeypatch.setattr(downloader, "download_md5", fake_md5) + downloader.parallel_update() + assert md5_called["n"] == 0 + + +def test_parallel_update_raises_on_insufficient_space(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + _make_orchestrator_patches(monkeypatch) + monkeypatch.setattr(downloader, "check_disk_space_requirements", lambda *_, **__: False) + with pytest.raises(SystemExit): + downloader.parallel_update() + + +def test_parallel_update_skip_space_check_proceeds_on_size_error(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_SPACE_CHECK", True) + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_orchestrator_patches(monkeypatch) + + def boom(_url): + raise downloader.RemoteFileSizeError("no size") + + monkeypatch.setattr(downloader, "get_remote_file_size", boom) + downloader.parallel_update() + + +def test_sequential_update_happy_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + _make_orchestrator_patches(monkeypatch) + downloader.sequential_update() + + +def test_sequential_update_raises_on_size_error_without_skip(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_SPACE_CHECK", False) + _make_orchestrator_patches(monkeypatch) + + def boom(_url): + raise downloader.RemoteFileSizeError("no size") + + monkeypatch.setattr(downloader, "get_remote_file_size", boom) + with pytest.raises(SystemExit): + downloader.sequential_update() diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py new file mode 100644 index 00000000..7ff51d4e --- /dev/null +++ b/tests/test_entrypoint.py @@ -0,0 +1,208 @@ +from pathlib import Path +from unittest.mock import patch + +import pytest + +from src import entrypoint +from src.downloader import InsufficientSpaceError +from src.utils import config + + +@pytest.fixture +def base_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): + os_node_dir = tmp_path / "node_1" + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + monkeypatch.setattr(config, "FORCE_UPDATE", False) + monkeypatch.setattr(config, "INITIAL_DOWNLOAD", True) + monkeypatch.setattr(config, "MIN_INDEX_DATE", None) + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "MD5_URL", None) + monkeypatch.setattr(config, "APPRISE_URLS", None) + return os_node_dir + + +def _patch_common(): + return (patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config")) + + +def test_entrypoint_skips_download_when_index_present(base_config: Path): + base_config.mkdir() + notify, validate = _patch_common() + with ( + notify as n, + validate as v, + patch("src.entrypoint.sequential_update") as seq, + patch("src.entrypoint.parallel_update") as par, + ): + entrypoint.main() + + n.assert_called() + v.assert_called_once() + seq.assert_not_called() + par.assert_not_called() + + +def test_entrypoint_runs_initial_sequential_when_no_index(base_config: Path): + notify, validate = _patch_common() + with notify, validate, patch("src.entrypoint.sequential_update") as seq: + entrypoint.main() + seq.assert_called_once() + + +def test_entrypoint_skips_initial_when_disabled(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "INITIAL_DOWNLOAD", False) + notify, validate = _patch_common() + with notify, validate, patch("src.entrypoint.sequential_update") as seq: + entrypoint.main() + seq.assert_not_called() + + +def test_entrypoint_force_update_uses_parallel_when_set(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FORCE_UPDATE", True) + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.parallel_update") as par, + patch("src.entrypoint.sequential_update") as seq, + ): + entrypoint.main() + par.assert_called_once() + seq.assert_not_called() + + +def test_entrypoint_force_update_uses_sequential_when_not_parallel(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FORCE_UPDATE", True) + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.sequential_update") as seq, + patch("src.entrypoint.parallel_update") as par, + ): + entrypoint.main() + seq.assert_called_once() + par.assert_not_called() + + +def test_entrypoint_force_update_exits_on_insufficient_space(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FORCE_UPDATE", True) + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no space")), + pytest.raises(SystemExit) as exc, + ): + entrypoint.main() + assert exc.value.code == 75 + + +def test_entrypoint_force_update_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FORCE_UPDATE", True) + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.sequential_update", side_effect=RuntimeError("boom")), + pytest.raises(RuntimeError), + ): + entrypoint.main() + + +def test_entrypoint_initial_download_exits_on_insufficient_space(base_config: Path): + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no space")), + pytest.raises(SystemExit) as exc, + ): + entrypoint.main() + assert exc.value.code == 75 + + +def test_entrypoint_validate_config_failure_exits(base_config: Path): + base_config.mkdir() + with ( + patch("src.entrypoint.send_notification"), + patch("src.entrypoint.validate_config", side_effect=ValueError("bad")), + pytest.raises(SystemExit) as exc, + ): + entrypoint.main() + assert exc.value.code == 1 + + +def test_entrypoint_min_date_triggers_update(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir() + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.check_index_age", return_value=True), + patch("src.entrypoint.sequential_update") as seq, + ): + entrypoint.main() + seq.assert_called_once() + + +def test_entrypoint_min_date_skips_when_index_recent(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir() + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.check_index_age", return_value=False), + patch("src.entrypoint.sequential_update") as seq, + ): + entrypoint.main() + seq.assert_not_called() + + +def test_entrypoint_min_date_exits_on_insufficient_space(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir() + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.check_index_age", return_value=True), + patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no")), + pytest.raises(SystemExit) as exc, + ): + entrypoint.main() + assert exc.value.code == 75 + + +def test_entrypoint_min_date_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir() + monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.check_index_age", return_value=True), + patch("src.entrypoint.sequential_update", side_effect=RuntimeError("boom")), + pytest.raises(RuntimeError), + ): + entrypoint.main() + + +def test_entrypoint_logs_apprise_redacted_when_set( + base_config: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +): + import logging as _logging + + base_config.mkdir() + monkeypatch.setattr(config, "APPRISE_URLS", "tgram://abc") + caplog.set_level(_logging.INFO, logger="root") + with patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config"): + entrypoint.main() + assert any("APPRISE_URLS: REDACTED" in r.message for r in caplog.records) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py new file mode 100644 index 00000000..b871ebd7 --- /dev/null +++ b/tests/test_filesystem.py @@ -0,0 +1,299 @@ +import hashlib +import os +import subprocess +from pathlib import Path +from unittest.mock import patch + +import pytest + +from src import filesystem +from src.utils import config + + +@pytest.fixture +def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + temp_dir = data_dir / "temp" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) + monkeypatch.setattr(config, "TEMP_DIR", str(temp_dir)) + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + return data_dir + + +def test_verify_checksum_returns_true_on_match(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"hello world") + expected = hashlib.md5(b"hello world").hexdigest() # noqa: S324 + md5_file = tmp_path / "index.bin.md5" + md5_file.write_text(f"{expected} index.bin\n") + + assert filesystem.verify_checksum(str(md5_file), str(index_file)) is True + + +def test_verify_checksum_raises_on_mismatch(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"hello world") + md5_file = tmp_path / "index.bin.md5" + md5_file.write_text("00000000000000000000000000000000 index.bin\n") + + with pytest.raises(Exception, match="Checksum mismatch"): + filesystem.verify_checksum(str(md5_file), str(index_file)) + + +def test_verify_checksum_raises_when_index_missing(tmp_path: Path): + md5_file = tmp_path / "x.md5" + md5_file.write_text("0" * 32) + with pytest.raises(FileNotFoundError): + filesystem.verify_checksum(str(md5_file), str(tmp_path / "missing")) + + +def test_verify_checksum_raises_when_md5_missing(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"data") + with pytest.raises(FileNotFoundError): + filesystem.verify_checksum(str(tmp_path / "missing.md5"), str(index_file)) + + +def test_verify_checksum_raises_on_empty_md5_file(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"data") + md5_file = tmp_path / "empty.md5" + md5_file.write_text("") + with pytest.raises((IndexError, ValueError)): + filesystem.verify_checksum(str(md5_file), str(index_file)) + + +def test_clear_temp_dir_removes_existing_temp(fake_dirs: Path): + temp = Path(config.TEMP_DIR) + temp.mkdir() + (temp / "file.txt").write_text("x") + (temp / "sub").mkdir() + (temp / "sub" / "nested").write_text("y") + + filesystem.clear_temp_dir() + + assert not temp.exists() + + +def test_clear_temp_dir_handles_missing_temp_dir(fake_dirs: Path): + assert not Path(config.TEMP_DIR).exists() + filesystem.clear_temp_dir() + + +def test_update_timestamp_marker_creates_marker(fake_dirs: Path): + filesystem.update_timestamp_marker() + marker = Path(config.DATA_DIR) / ".photon-index-updated" + assert marker.exists() + + +def test_update_timestamp_marker_swallows_errors(fake_dirs: Path): + with patch("src.filesystem.Path.touch", side_effect=OSError("nope")): + filesystem.update_timestamp_marker() + + +def test_cleanup_staging_and_temp_backup_removes_both(tmp_path: Path): + staging = tmp_path / "staging" + backup = tmp_path / "backup" + staging.mkdir() + backup.mkdir() + (staging / "f").write_text("x") + (backup / "f").write_text("y") + + filesystem.cleanup_staging_and_temp_backup(str(staging), str(backup)) + + assert not staging.exists() + assert not backup.exists() + + +def test_cleanup_staging_and_temp_backup_no_op_when_missing(tmp_path: Path): + filesystem.cleanup_staging_and_temp_backup(str(tmp_path / "a"), str(tmp_path / "b")) + + +def test_cleanup_staging_and_temp_backup_swallows_rmtree_errors(tmp_path: Path): + staging = tmp_path / "staging" + staging.mkdir() + with patch("src.filesystem.shutil.rmtree", side_effect=OSError("locked")): + filesystem.cleanup_staging_and_temp_backup(str(staging), str(tmp_path / "missing")) + + +def test_cleanup_backup_after_verification_removes_backup(tmp_path: Path): + target = tmp_path / "node_1" + backup = Path(str(target) + ".backup") + backup.mkdir() + (backup / "x").write_text("x") + + assert filesystem.cleanup_backup_after_verification(str(target)) is True + assert not backup.exists() + + +def test_cleanup_backup_after_verification_returns_true_when_no_backup(tmp_path: Path): + target = tmp_path / "node_1" + assert filesystem.cleanup_backup_after_verification(str(target)) is True + + +def test_cleanup_backup_after_verification_returns_false_on_failure(tmp_path: Path): + target = tmp_path / "node_1" + backup = Path(str(target) + ".backup") + backup.mkdir() + with patch("src.filesystem.shutil.rmtree", side_effect=OSError("locked")): + assert filesystem.cleanup_backup_after_verification(str(target)) is False + + +def test_move_index_atomic_swaps_into_target(tmp_path: Path): + source = tmp_path / "source" + source.mkdir() + (source / "data.txt").write_text("new") + + target = tmp_path / "target" + + assert filesystem.move_index_atomic(str(source), str(target)) is True + assert (target / "data.txt").read_text() == "new" + assert not source.exists() + assert not (tmp_path / "target.staging").exists() + + +def test_move_index_atomic_replaces_existing_target(tmp_path: Path): + target = tmp_path / "target" + target.mkdir() + (target / "old.txt").write_text("old") + + source = tmp_path / "source" + source.mkdir() + (source / "new.txt").write_text("new") + + assert filesystem.move_index_atomic(str(source), str(target)) is True + assert (target / "new.txt").read_text() == "new" + assert not (target / "old.txt").exists() + backup = Path(str(target) + ".backup") + assert backup.exists() + assert (backup / "old.txt").read_text() == "old" + + +def test_move_index_atomic_cleans_existing_staging_dir(tmp_path: Path): + source = tmp_path / "source" + source.mkdir() + (source / "x.txt").write_text("x") + target = tmp_path / "target" + leftover_staging = Path(str(target) + ".staging") + leftover_staging.mkdir() + (leftover_staging / "stale.txt").write_text("stale") + + assert filesystem.move_index_atomic(str(source), str(target)) is True + assert (target / "x.txt").read_text() == "x" + assert not leftover_staging.exists() + + +def test_move_index_atomic_rolls_back_on_failure(tmp_path: Path): + source = tmp_path / "source" + source.mkdir() + (source / "new.txt").write_text("new") + target = tmp_path / "target" + target.mkdir() + (target / "old.txt").write_text("old") + + real_rename = os.rename + call_count = {"n": 0} + + def fake_rename(src, dst): + call_count["n"] += 1 + if call_count["n"] == 2: + raise OSError("rename boom") + real_rename(src, dst) + + with patch("src.filesystem.os.rename", side_effect=fake_rename), pytest.raises(OSError, match="rename boom"): + filesystem.move_index_atomic(str(source), str(target)) + + assert (target / "old.txt").read_text() == "old" + assert not Path(str(target) + ".backup").exists() + + +def test_rollback_atomic_move_keeps_new_index_when_succeeded(tmp_path: Path): + target = tmp_path / "target" + target.mkdir() + (target / "fresh.txt").write_text("fresh") + + filesystem.rollback_atomic_move( + str(tmp_path / "source"), str(target), str(tmp_path / "staging"), str(tmp_path / "backup") + ) + + assert (target / "fresh.txt").read_text() == "fresh" + + +def test_rollback_atomic_move_swallows_inner_exceptions(tmp_path: Path): + target = tmp_path / "target" + target.mkdir() + backup = tmp_path / "backup" + backup.mkdir() + + with patch("src.filesystem.shutil.rmtree", side_effect=OSError("nope")): + filesystem.rollback_atomic_move(str(tmp_path / "source"), str(target), str(tmp_path / "staging"), str(backup)) + + +def test_move_index_calls_atomic_and_writes_marker(fake_dirs: Path): + temp_photon = Path(config.TEMP_DIR) / "photon_data" + temp_photon.mkdir(parents=True) + (temp_photon / "node_1").mkdir() + (temp_photon / "node_1" / "data.bin").write_text("payload") + + assert filesystem.move_index() is True + + marker = Path(config.DATA_DIR) / ".photon-index-updated" + assert marker.exists() + target = Path(config.PHOTON_DATA_DIR) + assert (target / "node_1" / "data.bin").read_text() == "payload" + + +def test_move_index_returns_false_when_atomic_returns_false(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(filesystem, "move_index_atomic", lambda *_: False) + assert filesystem.move_index() is False + marker = Path(config.DATA_DIR) / ".photon-index-updated" + assert not marker.exists() + + +def test_extract_index_runs_lbzip2_command(fake_dirs: Path): + index_file = Path(config.TEMP_DIR).parent / "index.tar.bz2" + index_file.parent.mkdir(parents=True, exist_ok=True) + index_file.write_bytes(b"x") + + completed = subprocess.CompletedProcess(args="cmd", returncode=0, stdout="ok", stderr="") + with patch("src.filesystem.subprocess.run", return_value=completed) as run: + filesystem.extract_index(str(index_file)) + + args, kwargs = run.call_args + assert "lbzip2 -d -c" in args[0] + assert str(index_file) in args[0] + assert kwargs["shell"] is True + assert kwargs["check"] is True + assert Path(config.TEMP_DIR).exists() + + +def test_extract_index_creates_temp_dir_when_missing(fake_dirs: Path): + index_file = Path(config.DATA_DIR) / "index.tar.bz2" + index_file.write_bytes(b"x") + + assert not Path(config.TEMP_DIR).exists() + completed = subprocess.CompletedProcess(args="cmd", returncode=0, stdout="", stderr="") + with patch("src.filesystem.subprocess.run", return_value=completed): + filesystem.extract_index(str(index_file)) + + assert Path(config.TEMP_DIR).exists() + + +def test_extract_index_propagates_called_process_error(fake_dirs: Path): + index_file = Path(config.DATA_DIR) / "index.tar.bz2" + index_file.write_bytes(b"x") + err = subprocess.CalledProcessError(returncode=1, cmd="lbzip2 ...", output="", stderr="boom") + with patch("src.filesystem.subprocess.run", side_effect=err), pytest.raises(subprocess.CalledProcessError): + filesystem.extract_index(str(index_file)) + + +def test_extract_index_propagates_unexpected_error(fake_dirs: Path): + index_file = Path(config.DATA_DIR) / "index.tar.bz2" + index_file.write_bytes(b"x") + with patch("src.filesystem.subprocess.run", side_effect=RuntimeError("nope")), pytest.raises(RuntimeError): + filesystem.extract_index(str(index_file)) diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py new file mode 100644 index 00000000..29286ba9 --- /dev/null +++ b/tests/test_process_manager.py @@ -0,0 +1,441 @@ +import signal +import subprocess +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +import schedule +from requests.exceptions import RequestException + +from src import process_manager +from src.utils import config + + +@pytest.fixture(autouse=True) +def _no_sleep(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(process_manager.time, "sleep", lambda *_: None) + + +@pytest.fixture(autouse=True) +def _stub_signal(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(process_manager.signal, "signal", lambda *_: None) + + +@pytest.fixture(autouse=True) +def _clear_schedule(): + schedule.clear() + yield + schedule.clear() + + +@pytest.fixture +def manager() -> process_manager.PhotonManager: + return process_manager.PhotonManager() + + +def _ok_response(): + resp = MagicMock() + resp.status_code = 200 + return resp + + +def test_check_photon_health_returns_true_on_200(): + with patch("src.process_manager.requests.get", return_value=_ok_response()): + assert process_manager.check_photon_health(timeout=1, max_retries=1) is True + + +def test_check_photon_health_returns_false_after_retries(): + bad = MagicMock() + bad.status_code = 500 + with patch("src.process_manager.requests.get", return_value=bad): + assert process_manager.check_photon_health(timeout=1, max_retries=2) is False + + +def test_check_photon_health_handles_request_exception(): + with patch("src.process_manager.requests.get", side_effect=RequestException("nope")): + assert process_manager.check_photon_health(timeout=1, max_retries=2) is False + + +def test_wait_for_photon_ready_true_when_health_ok(): + with patch("src.process_manager.check_photon_health", return_value=True): + assert process_manager.wait_for_photon_ready(timeout=1) is True + + +def test_wait_for_photon_ready_false_on_timeout(monkeypatch: pytest.MonkeyPatch): + times = iter([0, 0, 999]) + + def fake_time(): + return next(times) + + monkeypatch.setattr(process_manager.time, "time", fake_time) + with patch("src.process_manager.check_photon_health", return_value=False): + assert process_manager.wait_for_photon_ready(timeout=1) is False + + +def test_handle_shutdown_sets_exit_and_calls_shutdown(manager: process_manager.PhotonManager): + with patch.object(manager, "shutdown") as shutdown: + manager.handle_shutdown(signal.SIGTERM, None) + assert manager.should_exit is True + shutdown.assert_called_once() + + +def test_run_initial_setup_exits_on_failure(manager: process_manager.PhotonManager): + completed = subprocess.CompletedProcess(args=[], returncode=1) + with patch("src.process_manager.subprocess.run", return_value=completed), pytest.raises(SystemExit) as exc: + manager.run_initial_setup() + assert exc.value.code == 1 + + +def test_run_initial_setup_succeeds_on_zero_exit(manager: process_manager.PhotonManager): + completed = subprocess.CompletedProcess(args=[], returncode=0) + with patch("src.process_manager.subprocess.run", return_value=completed): + manager.run_initial_setup() + + +def test_start_photon_builds_full_command(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "ENABLE_METRICS", True) + monkeypatch.setattr(config, "JAVA_PARAMS", "-Xmx4g") + monkeypatch.setattr(config, "PHOTON_PARAMS", "-cors-any") + monkeypatch.setattr(config, "PHOTON_LISTEN_IP", "127.0.0.1") + monkeypatch.setattr(config, "DATA_DIR", "/data") + + fake_proc = MagicMock() + fake_proc.pid = 1234 + with ( + patch("src.process_manager.subprocess.Popen", return_value=fake_proc) as popen, + patch("src.process_manager.wait_for_photon_ready", return_value=True), + ): + assert manager.start_photon(max_startup_retries=1) is True + + cmd = popen.call_args.args[0] + assert cmd[0] == "java" + assert "-Xmx4g" in cmd + assert "-cors-any" in cmd + assert "/photon/photon.jar" in cmd + assert "-listen-ip" in cmd and "127.0.0.1" in cmd + assert "-data-dir" in cmd and "/data" in cmd + assert "-metrics-enable" in cmd and "prometheus" in cmd + + +def test_start_photon_retries_until_failure(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.pid = 1 + with ( + patch("src.process_manager.subprocess.Popen", return_value=fake_proc) as popen, + patch("src.process_manager.wait_for_photon_ready", return_value=False), + patch.object(manager, "stop_photon"), + ): + assert manager.start_photon(max_startup_retries=3) is False + assert popen.call_count == 3 + + +def test_stop_photon_no_op_when_no_process(manager: process_manager.PhotonManager): + manager.photon_process = None + manager.stop_photon() + + +def test_stop_photon_sigterm_path(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.pid = 4321 + fake_proc.wait = MagicMock() + manager.photon_process = fake_proc + with ( + patch("src.process_manager.os.killpg") as killpg, + patch("src.process_manager.os.getpgid", return_value=99), + patch.object(manager, "cleanup_orphaned_photon_processes"), + patch.object(manager, "_cleanup_lock_files"), + ): + manager.stop_photon() + killpg.assert_called_once_with(99, signal.SIGTERM) + assert manager.photon_process is None + + +def test_stop_photon_force_kills_on_timeout(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.pid = 4321 + fake_proc.wait.side_effect = [subprocess.TimeoutExpired(cmd="x", timeout=30), None] + manager.photon_process = fake_proc + + with ( + patch("src.process_manager.os.killpg") as killpg, + patch("src.process_manager.os.getpgid", return_value=99), + patch.object(manager, "cleanup_orphaned_photon_processes"), + patch.object(manager, "_cleanup_lock_files"), + ): + manager.stop_photon() + signals = [c.args[1] for c in killpg.call_args_list] + assert signal.SIGTERM in signals and signal.SIGKILL in signals + + +def test_stop_photon_handles_lookup_error(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.pid = 4321 + manager.photon_process = fake_proc + with ( + patch("src.process_manager.os.killpg", side_effect=ProcessLookupError), + patch("src.process_manager.os.getpgid", return_value=99), + patch.object(manager, "cleanup_orphaned_photon_processes"), + patch.object(manager, "_cleanup_lock_files"), + ): + manager.stop_photon() + assert manager.photon_process is None + + +def test_cleanup_orphaned_photon_processes_terminates_matches(manager: process_manager.PhotonManager): + proc_a = MagicMock() + proc_a.info = {"pid": 1, "name": "java", "cmdline": ["java", "-jar", "/photon/photon.jar"]} + proc_b = MagicMock() + proc_b.info = {"pid": 2, "name": "python", "cmdline": ["python", "x"]} + proc_c = MagicMock() + proc_c.info = {"pid": 3, "name": "java", "cmdline": ["java", "-jar", "other.jar"]} + + with patch("src.process_manager.psutil.process_iter", return_value=[proc_a, proc_b, proc_c]): + manager.cleanup_orphaned_photon_processes() + + proc_a.terminate.assert_called_once() + proc_b.terminate.assert_not_called() + proc_c.terminate.assert_not_called() + + +def test_cleanup_orphaned_photon_processes_kills_on_timeout(manager: process_manager.PhotonManager): + import psutil + + proc = MagicMock() + proc.info = {"pid": 1, "name": "java", "cmdline": ["java", "-jar", "/photon/photon.jar"]} + proc.wait.side_effect = psutil.TimeoutExpired(seconds=5) + + with patch("src.process_manager.psutil.process_iter", return_value=[proc]): + manager.cleanup_orphaned_photon_processes() + proc.kill.assert_called_once() + + +def test_cleanup_orphaned_photon_processes_swallows_exceptions(manager: process_manager.PhotonManager): + with patch("src.process_manager.psutil.process_iter", side_effect=RuntimeError("nope")): + manager.cleanup_orphaned_photon_processes() + + +def test_cleanup_lock_files_removes_existing( + manager: process_manager.PhotonManager, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + node = tmp_path / "node_1" + node.mkdir() + (node / "node.lock").write_text("") + (node / "data").mkdir() + (node / "data" / "node.lock").write_text("") + + monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) + manager._cleanup_lock_files() + assert not (node / "node.lock").exists() + assert not (node / "data" / "node.lock").exists() + + +def test_cleanup_lock_files_swallows_remove_errors( + manager: process_manager.PhotonManager, tmp_path: Path, monkeypatch: pytest.MonkeyPatch +): + node = tmp_path / "node_1" + node.mkdir() + (node / "node.lock").write_text("") + monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) + with patch("src.process_manager.os.remove", side_effect=OSError("locked")): + manager._cleanup_lock_files() + + +def test_run_update_skips_when_disabled(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "DISABLED") + with patch("src.process_manager.subprocess.run") as run: + manager.run_update() + run.assert_not_called() + + +def test_run_update_no_op_when_index_up_to_date( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + with ( + patch("src.process_manager.compare_mtime", return_value=False), + patch("src.process_manager.subprocess.run") as run, + ): + manager.run_update() + run.assert_not_called() + assert manager.state == process_manager.AppState.RUNNING + + +def test_run_update_parallel_path(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + completed = subprocess.CompletedProcess(args=[], returncode=0) + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.subprocess.run", return_value=completed), + patch.object(manager, "stop_photon") as stop, + patch.object(manager, "start_photon", return_value=True) as start, + patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + ): + manager.run_update() + stop.assert_called_once() + start.assert_called_once() + cleanup.assert_called_once() + + +def test_run_update_parallel_logs_failure_when_health_check_fails( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + completed = subprocess.CompletedProcess(args=[], returncode=0) + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.subprocess.run", return_value=completed), + patch.object(manager, "stop_photon"), + patch.object(manager, "start_photon", return_value=False), + patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + ): + manager.run_update() + cleanup.assert_not_called() + + +def test_run_update_sequential_path(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + completed = subprocess.CompletedProcess(args=[], returncode=0) + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.subprocess.run", return_value=completed), + patch.object(manager, "stop_photon") as stop, + patch.object(manager, "start_photon", return_value=True) as start, + patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + ): + manager.run_update() + stop.assert_called_once() + start.assert_called_once() + cleanup.assert_called_once() + + +def test_run_update_sequential_restarts_photon_after_failed_update( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + completed = subprocess.CompletedProcess(args=[], returncode=1) + manager.photon_process = None + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.subprocess.run", return_value=completed), + patch.object(manager, "stop_photon"), + patch.object(manager, "start_photon", return_value=True) as start, + ): + manager.run_update() + start.assert_called_once() + + +@pytest.mark.parametrize(("interval", "expected_unit"), [("3d", "days"), ("12h", "hours"), ("30m", "minutes")]) +def test_schedule_updates_parses_intervals( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, interval: str, expected_unit: str +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + monkeypatch.setattr(config, "UPDATE_INTERVAL", interval) + monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) + manager.schedule_updates() + jobs = schedule.get_jobs() + assert len(jobs) == 1 + assert jobs[0].unit == expected_unit + + +def test_schedule_updates_falls_back_to_daily_on_invalid_interval( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + monkeypatch.setattr(config, "UPDATE_INTERVAL", "garbage") + monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) + manager.schedule_updates() + jobs = schedule.get_jobs() + assert len(jobs) == 1 + assert jobs[0].unit == "days" + + +def test_schedule_updates_skipped_when_disabled( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "DISABLED") + monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) + manager.schedule_updates() + assert schedule.get_jobs() == [] + + +def test_monitor_photon_restarts_on_unexpected_exit(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.poll.side_effect = [1, None] + manager.photon_process = fake_proc + manager.state = process_manager.AppState.RUNNING + + call_count = {"n": 0} + + def restart(): + call_count["n"] += 1 + manager.should_exit = True + return True + + with patch.object(manager, "start_photon", side_effect=restart): + manager.monitor_photon() + assert call_count["n"] == 1 + + +def test_monitor_photon_logs_failed_restart(manager: process_manager.PhotonManager): + fake_proc = MagicMock() + fake_proc.poll.return_value = 1 + manager.photon_process = fake_proc + manager.state = process_manager.AppState.RUNNING + + def restart(): + manager.should_exit = True + return False + + with patch.object(manager, "start_photon", side_effect=restart): + manager.monitor_photon() + + +def test_shutdown_calls_stop_and_exits(manager: process_manager.PhotonManager): + with patch.object(manager, "stop_photon") as stop, pytest.raises(SystemExit) as exc: + manager.shutdown() + stop.assert_called_once() + assert exc.value.code == 0 + + +def test_run_skips_setup_when_index_present( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +): + node = tmp_path / "node_1" + node.mkdir() + monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) + monkeypatch.setattr(config, "FORCE_UPDATE", False) + with ( + patch.object(manager, "run_initial_setup") as setup, + patch.object(manager, "start_photon", return_value=True), + patch.object(manager, "schedule_updates"), + patch.object(manager, "monitor_photon"), + ): + manager.run() + setup.assert_not_called() + + +def test_run_invokes_initial_setup_when_no_index( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +): + monkeypatch.setattr(config, "OS_NODE_DIR", str(tmp_path / "missing")) + monkeypatch.setattr(config, "FORCE_UPDATE", False) + with ( + patch.object(manager, "run_initial_setup") as setup, + patch.object(manager, "start_photon", return_value=True), + patch.object(manager, "schedule_updates"), + patch.object(manager, "monitor_photon"), + ): + manager.run() + setup.assert_called_once() + + +def test_run_exits_when_photon_fails_to_start( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path +): + node = tmp_path / "node_1" + node.mkdir() + monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) + monkeypatch.setattr(config, "FORCE_UPDATE", False) + with patch.object(manager, "start_photon", return_value=False), pytest.raises(SystemExit) as exc: + manager.run() + assert exc.value.code == 1 diff --git a/tests/test_updater.py b/tests/test_updater.py new file mode 100644 index 00000000..2e6794db --- /dev/null +++ b/tests/test_updater.py @@ -0,0 +1,57 @@ +from unittest.mock import patch + +import pytest + +from src import updater +from src.utils import config + + +def test_updater_main_runs_parallel(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + with ( + patch("src.updater.parallel_update") as parallel, + patch("src.updater.sequential_update") as sequential, + patch("src.updater.send_notification") as notify, + ): + updater.main() + parallel.assert_called_once_with() + sequential.assert_not_called() + notify.assert_called_once_with("Photon Index Updated Successfully") + + +def test_updater_main_runs_sequential(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + with ( + patch("src.updater.parallel_update") as parallel, + patch("src.updater.sequential_update") as sequential, + patch("src.updater.send_notification"), + ): + updater.main() + parallel.assert_not_called() + sequential.assert_called_once_with() + + +def test_updater_main_exits_on_unknown_strategy(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "BOGUS") + with ( + patch("src.updater.parallel_update"), + patch("src.updater.sequential_update"), + patch("src.updater.send_notification"), + pytest.raises(SystemExit) as exc, + ): + updater.main() + assert exc.value.code == 1 + + +def test_updater_main_notifies_on_failure(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + with ( + patch("src.updater.parallel_update", side_effect=RuntimeError("boom")), + patch("src.updater.send_notification") as notify, + pytest.raises(SystemExit) as exc, + ): + updater.main() + assert exc.value.code == 1 + + args = [call.args[0] for call in notify.call_args_list] + assert any("Photon Update Failed" in a for a in args) diff --git a/tests/utils/test_logger.py b/tests/utils/test_logger.py new file mode 100644 index 00000000..37302988 --- /dev/null +++ b/tests/utils/test_logger.py @@ -0,0 +1,71 @@ +import contextlib +import logging +from pathlib import Path + +import pytest + +from src.utils import config, logger + + +@contextlib.contextmanager +def _empty_root_handlers(): + root = logging.getLogger() + saved = root.handlers[:] + saved_level = root.level + root.handlers = [] + try: + yield root + finally: + root.handlers = saved + root.level = saved_level + + +def test_setup_logging_attaches_console_and_file_handlers(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "DATA_DIR", str(tmp_path)) + monkeypatch.setattr(config, "LOG_LEVEL", "DEBUG") + + with _empty_root_handlers() as root: + logger.setup_logging() + handler_types = {type(h).__name__ for h in root.handlers} + assert "StreamHandler" in handler_types + assert "RotatingFileHandler" in handler_types + + assert (tmp_path / "logs" / "photon.log").exists() + + +def test_setup_logging_is_idempotent(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "DATA_DIR", str(tmp_path)) + monkeypatch.setattr(config, "LOG_LEVEL", "INFO") + + with _empty_root_handlers() as root: + logger.setup_logging() + first_count = len(root.handlers) + logger.setup_logging() + assert len(root.handlers) == first_count + + +def test_setup_logging_swallows_oserror_on_log_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "DATA_DIR", str(tmp_path)) + monkeypatch.setattr(config, "LOG_LEVEL", "INFO") + + real_mkdir = Path.mkdir + + def fake_mkdir(self, *args, **kwargs): + if self.name == "logs": + raise PermissionError("nope") + return real_mkdir(self, *args, **kwargs) + + monkeypatch.setattr(Path, "mkdir", fake_mkdir) + with _empty_root_handlers() as root: + logger.setup_logging() + handler_types = {type(h).__name__ for h in root.handlers} + assert "StreamHandler" in handler_types + assert "RotatingFileHandler" not in handler_types + + +def test_get_logger_returns_root_when_no_name(): + assert logger.get_logger() is logging.getLogger() + + +def test_get_logger_returns_named_logger(): + assert logger.get_logger("foo").name == "foo" diff --git a/tests/utils/test_notify.py b/tests/utils/test_notify.py new file mode 100644 index 00000000..db754255 --- /dev/null +++ b/tests/utils/test_notify.py @@ -0,0 +1,65 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from src.utils import config, notify + + +class _FakeApprise: + def __init__(self, valid_count=1, notify_result=True): + self._added = [] + self._valid_count = valid_count + self._notify_result = notify_result + self.notify = MagicMock(return_value=notify_result) + + def add(self, url): + self._added.append(url) + + def __len__(self): + return self._valid_count + + +def test_send_notification_skips_when_no_urls(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + import logging as _logging + + monkeypatch.setattr(config, "APPRISE_URLS", "") + caplog.set_level(_logging.INFO, logger="root") + with patch("src.utils.notify.apprise.Apprise") as factory: + notify.send_notification("hello") + factory.assert_not_called() + assert any("skipping notification" in r.message for r in caplog.records) + + +def test_send_notification_sends_to_each_url(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "APPRISE_URLS", "tgram://abc, slack://xyz , ") + fake = _FakeApprise(valid_count=2) + with patch("src.utils.notify.apprise.Apprise", return_value=fake): + notify.send_notification("hello", title="Title") + + assert fake._added == ["tgram://abc", "slack://xyz"] + fake.notify.assert_called_once_with(body="hello", title="Title") + + +def test_send_notification_warns_when_all_invalid(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture): + import logging as _logging + + monkeypatch.setattr(config, "APPRISE_URLS", "garbage") + fake = _FakeApprise(valid_count=0) + caplog.set_level(_logging.WARNING, logger="root") + with patch("src.utils.notify.apprise.Apprise", return_value=fake): + notify.send_notification("hello") + assert any("No valid Apprise URLs" in r.message for r in caplog.records) + fake.notify.assert_not_called() + + +def test_send_notification_logs_error_when_apprise_fails( + monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture +): + import logging as _logging + + monkeypatch.setattr(config, "APPRISE_URLS", "tgram://abc") + fake = _FakeApprise(valid_count=1, notify_result=False) + caplog.set_level(_logging.ERROR, logger="root") + with patch("src.utils.notify.apprise.Apprise", return_value=fake): + notify.send_notification("hello") + assert any("Failed to send notification" in r.message for r in caplog.records) From 79fcf197312bc1f3308f42ab0ef447db767827f2 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Thu, 2 Apr 2026 14:04:24 +0200 Subject: [PATCH 05/27] single region jsonl implementation --- pyproject.toml | 27 +++--- src/entrypoint.py | 52 ++++++++--- src/importer.py | 64 +++++++++++++ src/jsonl/__init__.py | 3 + src/jsonl/decompressor.py | 7 ++ src/jsonl/downloader.py | 46 ++++++++++ src/process_manager.py | 8 ++ src/updater.py | 4 + src/utils/config.py | 27 ++++++ src/utils/regions.py | 95 +++++++++++++++----- src/utils/validate_config.py | 30 ++++++- tests/jsonl/test_downloader.py | 6 ++ tests/test_entrypoint.py | 35 ++++++++ tests/test_importer.py | 135 ++++++++++++++++++++++++++++ tests/utils/test_regions.py | 32 ++++++- tests/utils/test_validate_config.py | 44 +++++++++ 16 files changed, 563 insertions(+), 52 deletions(-) create mode 100644 src/importer.py create mode 100644 src/jsonl/__init__.py create mode 100644 src/jsonl/decompressor.py create mode 100644 src/jsonl/downloader.py create mode 100644 tests/jsonl/test_downloader.py create mode 100644 tests/test_importer.py diff --git a/pyproject.toml b/pyproject.toml index 3b39cfb0..d06ff83a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "schedule>=1.2.2", "tqdm==4.67.3", "urllib3==2.7.0", + "zstandard>=0.23.0", ] [dependency-groups] @@ -37,14 +38,14 @@ line-length = 120 [tool.ruff.lint] # select = ["ALL"] ignore = [ - "ANN", # flake8-annotations - "COM", # flake8-commas - "DJ", # django - "EXE", # flake8-executable - "BLE", # blind except - "PTH", # flake8-pathlib - "T10", # debugger - "TID", # flake8-tidy-imports + "ANN", # flake8-annotations + "COM", # flake8-commas + "DJ", # django + "EXE", # flake8-executable + "BLE", # blind except + "PTH", # flake8-pathlib + "T10", # debugger + "TID", # flake8-tidy-imports "D100", "D101", "D102", @@ -54,18 +55,18 @@ ignore = [ "D106", "D107", "D101", - "D107", # missing docstring in public module - "D102", # missing docstring in public class - "D104", # missing docstring in public package + "D107", # missing docstring in public module + "D102", # missing docstring in public class + "D104", # missing docstring in public package "D213", "D203", "D400", "D415", "G004", "PLR2004", - "E501", # line too long + "E501", # line too long "TRY", - "SIM105", # faster without contextlib + "SIM105", # faster without contextlib ] extend-select = ["B", "S", "SIM", "T20", "C901", "RUF"] fixable = ["ALL"] diff --git a/src/entrypoint.py b/src/entrypoint.py index ac6e930d..308d939d 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -3,6 +3,7 @@ from src.check_remote import check_index_age from src.downloader import InsufficientSpaceError, parallel_update, sequential_update +from src.importer import run_jsonl_import from src.utils import config from src.utils.logger import get_logger, setup_logging from src.utils.notify import send_notification @@ -12,14 +13,15 @@ logger = get_logger() -def main(): - send_notification("Photon-Docker Initializing") - - logger.debug("Entrypoint setup called") +def log_config() -> None: logger.info("=== CONFIG VARIABLES ===") + logger.info(f"IMPORT_MODE: {config.IMPORT_MODE}") logger.info(f"UPDATE_STRATEGY: {config.UPDATE_STRATEGY}") logger.info(f"UPDATE_INTERVAL: {config.UPDATE_INTERVAL}") logger.info(f"REGION: {config.REGION}") + logger.info(f"LANGUAGES: {config.LANGUAGES}") + logger.info(f"EXTRA_TAGS: {config.EXTRA_TAGS}") + logger.info(f"IMPORT_GEOMETRIES: {config.IMPORT_GEOMETRIES}") logger.info(f"FORCE_UPDATE: {config.FORCE_UPDATE}") logger.info(f"DOWNLOAD_MAX_RETRIES: {config.DOWNLOAD_MAX_RETRIES}") logger.info(f"FILE_URL (sanitized): {sanitize_url(config.FILE_URL)}") @@ -39,6 +41,32 @@ def main(): logger.info("=== END CONFIG VARIABLES ===") + +def run_update_or_import(force_update: bool = False) -> None: + if config.IMPORT_MODE == "jsonl": + action = "forced JSONL import" if force_update else "initial JSONL import" + logger.info(f"Starting {action}") + run_jsonl_import() + return + + if not force_update: + logger.info("Starting initial download using sequential strategy") + logger.info("Note: Initial download will use sequential strategy regardless of config setting") + sequential_update() + return + + if config.UPDATE_STRATEGY == "PARALLEL": + parallel_update() + else: + sequential_update() + + +def main(): + send_notification("Photon-Docker Initializing") + + logger.debug("Entrypoint setup called") + log_config() + try: validate_config() except ValueError as e: @@ -51,10 +79,7 @@ def main(): if config.FORCE_UPDATE: logger.info("Starting forced update") try: - if config.UPDATE_STRATEGY == "PARALLEL": - parallel_update() - else: - sequential_update() + run_update_or_import(force_update=True) except InsufficientSpaceError as e: logger.error(f"Cannot proceed with force update: {e}") send_notification(f"Photon-Docker force update failed: {e}") @@ -66,17 +91,22 @@ def main(): if not config.INITIAL_DOWNLOAD: logger.warning("Initial download is disabled but no existing Photon index was found. ") return - logger.info("Starting initial download using sequential strategy") - logger.info("Note: Initial download will use sequential strategy regardless of config setting") try: - sequential_update() + run_update_or_import(force_update=False) except InsufficientSpaceError as e: logger.error(f"Cannot proceed: {e}") send_notification(f"Photon-Docker cannot start: {e}") sys.exit(75) + except Exception: + logger.error("Initial setup failed") + raise else: logger.info("Existing index found, skipping download") + if config.IMPORT_MODE == "jsonl": + logger.info("JSONL mode with existing index found, skipping automatic rebuild during setup") + return + if config.MIN_INDEX_DATE and check_index_age(): logger.info("Index is older than minimum required date, starting sequential update") try: diff --git a/src/importer.py b/src/importer.py new file mode 100644 index 00000000..da082b2f --- /dev/null +++ b/src/importer.py @@ -0,0 +1,64 @@ +import os +import shlex +import subprocess + +from src.filesystem import clear_temp_dir +from src.jsonl.decompressor import stream_decompress +from src.jsonl.downloader import download_jsonl +from src.utils import config +from src.utils.logger import get_logger +from src.utils.regions import get_regions_for_jsonl + +logger = get_logger(__name__) + + +def run_jsonl_import() -> None: + regions = get_regions_for_jsonl(config.get_jsonl_regions()) + if len(regions) != 1: + raise ValueError("JSONL mode currently supports exactly one region.") + + region = regions[0] + + try: + jsonl_path = download_jsonl(region) + import_proc = _start_photon_import("-") + try: + if import_proc.stdin is None: + raise RuntimeError("Photon import process stdin is unavailable") + for chunk in stream_decompress(jsonl_path): + import_proc.stdin.write(chunk) + + import_proc.stdin.close() + return_code = import_proc.wait() + if return_code != 0: + raise RuntimeError(f"Photon JSONL import failed with exit code {return_code}") + except Exception: + import_proc.kill() + import_proc.wait() + raise + finally: + clear_temp_dir() + + +def _start_photon_import(input_source: str) -> subprocess.Popen: + os.makedirs(config.DATA_DIR, exist_ok=True) + + cmd = ["java"] + if config.JAVA_PARAMS: + cmd.extend(shlex.split(config.JAVA_PARAMS)) + + cmd.extend(["-jar", "/photon/photon.jar", "import", "-import-file", input_source, "-data-dir", config.DATA_DIR]) + + languages = config.get_languages() + if languages: + cmd.extend(["-languages", ",".join(languages)]) + + extra_tags = config.get_extra_tags() + if extra_tags: + cmd.extend(["-extra-tags", ",".join(extra_tags)]) + + if config.IMPORT_GEOMETRIES: + cmd.append("-full-geometries") + + logger.info(f"Starting Photon JSONL import for region(s): {', '.join(config.get_jsonl_regions())}") + return subprocess.Popen(cmd, cwd=config.PHOTON_DIR, stdin=subprocess.PIPE) # noqa: S603 diff --git a/src/jsonl/__init__.py b/src/jsonl/__init__.py new file mode 100644 index 00000000..54590dd7 --- /dev/null +++ b/src/jsonl/__init__.py @@ -0,0 +1,3 @@ +from src.jsonl.downloader import download_jsonl + +__all__ = ["download_jsonl"] diff --git a/src/jsonl/decompressor.py b/src/jsonl/decompressor.py new file mode 100644 index 00000000..7384534e --- /dev/null +++ b/src/jsonl/decompressor.py @@ -0,0 +1,7 @@ +import zstandard as zstd + + +def stream_decompress(input_path: str, read_size: int = 65536): + dctx = zstd.ZstdDecompressor() + with open(input_path, "rb") as file_handle: + yield from dctx.read_to_iter(file_handle, read_size=read_size) diff --git a/src/jsonl/downloader.py b/src/jsonl/downloader.py new file mode 100644 index 00000000..b8f37930 --- /dev/null +++ b/src/jsonl/downloader.py @@ -0,0 +1,46 @@ +import os + +from src.downloader import download_file +from src.utils import config +from src.utils.logger import get_logger +from src.utils.regions import get_jsonl_filename, get_region_info, normalize_region + +logger = get_logger(__name__) + + +def get_jsonl_url(region: str) -> str: + normalized_region = normalize_region(region) + if normalized_region is None: + raise ValueError(f"Unknown region: {region}") + + region_info = get_region_info(normalized_region) + if not region_info: + raise ValueError(f"Unknown region: {region}") + + filename = get_jsonl_filename(normalized_region, config.JSONL_FILE_EXTENSION, config.JSONL_RELEASE_CHANNEL) + + if region_info["type"] == "planet": + return f"{config.BASE_URL}/{filename}" + if region_info["type"] == "continent": + return f"{config.BASE_URL}/{normalized_region}/{filename}" + + continent = region_info["continent"] + return f"{config.BASE_URL}/{continent}/{normalized_region}/{filename}" + + +def download_jsonl(region: str) -> str: + os.makedirs(config.TEMP_DIR, exist_ok=True) + + normalized_region = normalize_region(region) + if normalized_region is None: + raise ValueError(f"Unknown region: {region}") + + download_url = get_jsonl_url(normalized_region) + output_path = os.path.join(config.TEMP_DIR, f"{normalized_region}.{config.JSONL_FILE_EXTENSION}") + + logger.info(f"Downloading JSONL dump for {normalized_region} from {download_url}") + + if not download_file(download_url, output_path): + raise RuntimeError(f"Failed to download JSONL dump from {download_url}") + + return output_path diff --git a/src/process_manager.py b/src/process_manager.py index f9204182..dee435f2 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -205,6 +205,10 @@ def run_update(self): logger.info("Updates disabled, skipping") return + if config.IMPORT_MODE == "jsonl": + logger.info("Scheduled JSONL rebuilds are not implemented yet, skipping") + return + self.state = AppState.UPDATING logger.info(f"Running {config.UPDATE_STRATEGY.lower()} update...") update_start = time.time() @@ -257,6 +261,10 @@ def schedule_updates(self): logger.info("Updates disabled, not scheduling") return + if config.IMPORT_MODE == "jsonl": + logger.info("Skipping scheduled updates in JSONL mode until rebuild support is implemented") + return + interval = config.UPDATE_INTERVAL.lower() if interval.endswith("d"): diff --git a/src/updater.py b/src/updater.py index f11bf548..0a20f735 100644 --- a/src/updater.py +++ b/src/updater.py @@ -12,6 +12,10 @@ def main(): logger.info("Starting update process...") try: + if config.IMPORT_MODE == "jsonl": + logger.info("Scheduled JSONL rebuilds are not implemented yet, skipping updater run") + return + if config.UPDATE_STRATEGY == "PARALLEL": logger.info("Running parallel update...") parallel_update() diff --git a/src/utils/config.py b/src/utils/config.py index 2d79a205..73e580aa 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -1,9 +1,13 @@ import os # USER CONFIG +IMPORT_MODE = os.getenv("IMPORT_MODE", "db") UPDATE_STRATEGY = os.getenv("UPDATE_STRATEGY", "SEQUENTIAL") UPDATE_INTERVAL = os.getenv("UPDATE_INTERVAL", "30d") REGION = os.getenv("REGION") +LANGUAGES = os.getenv("LANGUAGES") +EXTRA_TAGS = os.getenv("EXTRA_TAGS") +IMPORT_GEOMETRIES = os.getenv("IMPORT_GEOMETRIES", "False").lower() in ("true", "1", "t") FORCE_UPDATE = os.getenv("FORCE_UPDATE", "False").lower() in ("true", "1", "t") DOWNLOAD_MAX_RETRIES = os.getenv("DOWNLOAD_MAX_RETRIES", "3") FILE_URL = os.getenv("FILE_URL") @@ -23,6 +27,8 @@ # APP CONFIG INDEX_DB_VERSION = "1.0" INDEX_FILE_EXTENSION = "tar.bz2" +JSONL_FILE_EXTENSION = "jsonl.zst" +JSONL_RELEASE_CHANNEL = "master" PHOTON_DIR = "/photon" DATA_DIR = "/photon/data" @@ -30,6 +36,27 @@ TEMP_DIR = os.path.join(DATA_DIR, "temp") OS_NODE_DIR = os.path.join(PHOTON_DATA_DIR, "node_1") + +def get_languages() -> list[str] | None: + return _get_csv_values(LANGUAGES) + + +def get_extra_tags() -> list[str] | None: + return _get_csv_values(EXTRA_TAGS) + + +def get_jsonl_regions() -> list[str]: + return _get_csv_values(REGION) or [] + + +def _get_csv_values(value: str | None) -> list[str] | None: + if not value: + return None + + values = [item.strip() for item in value.split(",") if item.strip()] + return values or None + + if FILE_URL: UPDATE_STRATEGY = "DISABLED" if not MD5_URL: diff --git a/src/utils/regions.py b/src/utils/regions.py index 74e17a9b..ba38b65c 100644 --- a/src/utils/regions.py +++ b/src/utils/regions.py @@ -1,27 +1,32 @@ REGION_MAPPING = { - "planet": {"type": "planet", "continent": None, "available": True}, - "africa": {"type": "continent", "continent": "africa", "available": True}, - "asia": {"type": "continent", "continent": "asia", "available": True}, - "australia-oceania": {"type": "continent", "continent": "australia-oceania", "available": True}, - "europe": {"type": "continent", "continent": "europe", "available": True}, - "north-america": {"type": "continent", "continent": "north-america", "available": True}, - "south-america": {"type": "continent", "continent": "south-america", "available": True}, - "india": {"type": "sub-region", "continent": "asia", "available": True}, - "japan": {"type": "sub-region", "continent": "asia", "available": True}, - "andorra": {"type": "sub-region", "continent": "europe", "available": True}, - "austria": {"type": "sub-region", "continent": "europe", "available": True}, - "denmark": {"type": "sub-region", "continent": "europe", "available": True}, - "france-monacco": {"type": "sub-region", "continent": "europe", "available": True}, - "germany": {"type": "sub-region", "continent": "europe", "available": True}, - "luxemburg": {"type": "sub-region", "continent": "europe", "available": True}, - "netherlands": {"type": "sub-region", "continent": "europe", "available": True}, - "russia": {"type": "sub-region", "continent": "europe", "available": True}, - "slovakia": {"type": "sub-region", "continent": "europe", "available": True}, - "spain": {"type": "sub-region", "continent": "europe", "available": True}, - "canada": {"type": "sub-region", "continent": "north-america", "available": True}, - "mexico": {"type": "sub-region", "continent": "north-america", "available": True}, - "usa": {"type": "sub-region", "continent": "north-america", "available": True}, - "argentina": {"type": "sub-region", "continent": "south-america", "available": True}, + "planet": {"type": "planet", "continent": None, "db_available": True, "jsonl_available": True}, + "africa": {"type": "continent", "continent": "africa", "db_available": True, "jsonl_available": True}, + "asia": {"type": "continent", "continent": "asia", "db_available": True, "jsonl_available": True}, + "australia-oceania": { + "type": "continent", + "continent": "australia-oceania", + "db_available": True, + "jsonl_available": True, + }, + "europe": {"type": "continent", "continent": "europe", "db_available": True, "jsonl_available": True}, + "north-america": {"type": "continent", "continent": "north-america", "db_available": True, "jsonl_available": True}, + "south-america": {"type": "continent", "continent": "south-america", "db_available": True, "jsonl_available": True}, + "india": {"type": "sub-region", "continent": "asia", "db_available": True, "jsonl_available": True}, + "japan": {"type": "sub-region", "continent": "asia", "db_available": True, "jsonl_available": True}, + "andorra": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "austria": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "denmark": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "france-monacco": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "germany": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "luxemburg": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "netherlands": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "russia": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "slovakia": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "spain": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, + "canada": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, + "mexico": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, + "usa": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, + "argentina": {"type": "sub-region", "continent": "south-america", "db_available": True, "jsonl_available": True}, } REGION_ALIASES = { @@ -82,6 +87,10 @@ def get_index_filename(region_name: str, db_version: str, extension: str) -> str return f"photon-db-{region_name}-{db_version}-latest.{extension}" +def get_jsonl_filename(region_name: str, extension: str, channel: str = "master") -> str: + return f"photon-dump-{region_name}-{channel}-latest.{extension}" + + def get_index_url_path(region: str | None, db_version: str, extension: str) -> str: if region: normalized = normalize_region(region) @@ -106,3 +115,43 @@ def get_index_url_path(region: str | None, db_version: str, extension: str) -> s raise ValueError(f"Invalid region type: {region_type}") return f"/{get_index_filename('planet', db_version, extension)}" + + +def get_jsonl_url_path(region: str, extension: str) -> str: + normalized = normalize_region(region) + if normalized is None: + raise ValueError(f"Unknown region: {region}") + + region_info = get_region_info(region) + if not region_info: + raise ValueError(f"Unknown region: {region}") + + if not region_info.get("jsonl_available", False): + raise ValueError(f"JSONL not available for region: {region}") + + filename = get_jsonl_filename(normalized, extension) + region_type = region_info["type"] + + if region_type == "planet": + return f"/{filename}" + if region_type == "continent": + return f"/{normalized}/{filename}" + if region_type == "sub-region": + continent = region_info["continent"] + return f"/{continent}/{normalized}/{filename}" + + raise ValueError(f"Invalid region type: {region_type}") + + +def get_regions_for_jsonl(regions: list[str]) -> list[str]: + validated_regions = [] + + for region in regions: + region_info = get_region_info(region) + if not region_info: + raise ValueError(f"Unknown region: {region}") + if not region_info.get("jsonl_available", False): + raise ValueError(f"JSONL not available for region: {region}") + validated_regions.append(normalize_region(region)) + + return [region for region in validated_regions if region] diff --git a/src/utils/validate_config.py b/src/utils/validate_config.py index d5337e10..9fc846be 100644 --- a/src/utils/validate_config.py +++ b/src/utils/validate_config.py @@ -2,7 +2,7 @@ from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import is_valid_region +from src.utils.regions import get_regions_for_jsonl, is_valid_region logging = get_logger() @@ -11,6 +11,10 @@ def validate_config(): logging.info("Validating environment variables...") error_messages = [] + valid_import_modes = ["db", "jsonl"] + if config.IMPORT_MODE not in valid_import_modes: + error_messages.append(f"Invalid IMPORT_MODE: '{config.IMPORT_MODE}'. Must be one of {valid_import_modes}.") + valid_strategies = ["SEQUENTIAL", "PARALLEL", "DISABLED"] if config.UPDATE_STRATEGY not in valid_strategies: error_messages.append( @@ -22,8 +26,28 @@ def validate_config(): f"Invalid UPDATE_INTERVAL format: '{config.UPDATE_INTERVAL}'. Expected format like '30d', '12h', or '30m'." ) - if config.REGION and not is_valid_region(config.REGION): - error_messages.append(f"Invalid REGION: '{config.REGION}'. Must be a valid continent, sub-region, or 'planet'.") + if config.IMPORT_MODE == "db": + if config.REGION and not is_valid_region(config.REGION): + error_messages.append( + f"Invalid REGION: '{config.REGION}'. Must be a valid continent, sub-region, or 'planet'." + ) + if config.REGION and len(config.get_jsonl_regions()) > 1: + error_messages.append("DB mode supports exactly one region in REGION.") + + if config.IMPORT_MODE == "jsonl": + if config.FILE_URL: + error_messages.append("FILE_URL is not supported when IMPORT_MODE=jsonl.") + if config.MD5_URL: + error_messages.append("MD5_URL is not supported when IMPORT_MODE=jsonl.") + if not config.get_jsonl_regions(): + error_messages.append("REGION is required when IMPORT_MODE=jsonl.") + else: + try: + validated_regions = get_regions_for_jsonl(config.get_jsonl_regions()) + if len(validated_regions) != 1: + error_messages.append("JSONL mode currently supports exactly one region.") + except ValueError as exc: + error_messages.append(str(exc)) if error_messages: full_error_message = "Configuration validation failed:\n" + "\n".join(error_messages) diff --git a/tests/jsonl/test_downloader.py b/tests/jsonl/test_downloader.py new file mode 100644 index 00000000..17ac8a41 --- /dev/null +++ b/tests/jsonl/test_downloader.py @@ -0,0 +1,6 @@ +from src.jsonl.downloader import get_jsonl_url +from src.utils import config + + +def test_get_jsonl_url_uses_base_url(): + assert get_jsonl_url("germany") == f"{config.BASE_URL}/europe/germany/photon-dump-germany-master-latest.jsonl.zst" diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py index 7ff51d4e..be5c39da 100644 --- a/tests/test_entrypoint.py +++ b/tests/test_entrypoint.py @@ -12,6 +12,7 @@ def base_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): os_node_dir = tmp_path / "node_1" monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + monkeypatch.setattr(config, "IMPORT_MODE", "db") monkeypatch.setattr(config, "FORCE_UPDATE", False) monkeypatch.setattr(config, "INITIAL_DOWNLOAD", True) monkeypatch.setattr(config, "MIN_INDEX_DATE", None) @@ -206,3 +207,37 @@ def test_entrypoint_logs_apprise_redacted_when_set( with patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config"): entrypoint.main() assert any("APPRISE_URLS: REDACTED" in r.message for r in caplog.records) + + +def test_entrypoint_runs_jsonl_import_when_no_index(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + notify, validate = _patch_common() + with notify, validate, patch("src.entrypoint.run_jsonl_import") as imp: + entrypoint.main() + imp.assert_called_once() + + +def test_entrypoint_skips_jsonl_rebuild_when_index_present(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir() + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + notify, validate = _patch_common() + with notify, validate, patch("src.entrypoint.run_jsonl_import") as imp: + entrypoint.main() + imp.assert_not_called() + + +def test_entrypoint_force_update_runs_jsonl_import(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + monkeypatch.setattr(config, "FORCE_UPDATE", True) + notify, validate = _patch_common() + with ( + notify, + validate, + patch("src.entrypoint.run_jsonl_import") as imp, + patch("src.entrypoint.sequential_update") as seq, + patch("src.entrypoint.parallel_update") as par, + ): + entrypoint.main() + imp.assert_called_once() + seq.assert_not_called() + par.assert_not_called() diff --git a/tests/test_importer.py b/tests/test_importer.py new file mode 100644 index 00000000..80f2c186 --- /dev/null +++ b/tests/test_importer.py @@ -0,0 +1,135 @@ +import io + +import pytest + +from src import importer +from src.utils import config + + +def _noop_makedirs(path: str, exist_ok: bool = False) -> None: + _ = (path, exist_ok) + + +def test_start_photon_import_builds_expected_command(monkeypatch): + commands = [] + + class DummyProcess: + def __init__(self): + self.stdin = io.BytesIO() + + def fake_popen(cmd, cwd, stdin): + commands.append({"cmd": cmd, "cwd": cwd, "stdin": stdin}) + return DummyProcess() + + monkeypatch.setattr(config, "JAVA_PARAMS", "-Xmx2g") + monkeypatch.setattr(config, "LANGUAGES", "en,de") + monkeypatch.setattr(config, "EXTRA_TAGS", "website,phone") + monkeypatch.setattr(config, "IMPORT_GEOMETRIES", True) + monkeypatch.setattr(importer.os, "makedirs", _noop_makedirs) + monkeypatch.setattr(importer.subprocess, "Popen", fake_popen) + + importer._start_photon_import("-") + + assert commands == [ + { + "cmd": [ + "java", + "-Xmx2g", + "-jar", + "/photon/photon.jar", + "import", + "-import-file", + "-", + "-data-dir", + config.DATA_DIR, + "-languages", + "en,de", + "-extra-tags", + "website,phone", + "-full-geometries", + ], + "cwd": config.PHOTON_DIR, + "stdin": importer.subprocess.PIPE, + } + ] + + +class RecordingProcess: + def __init__(self, wait_return_code: int = 0): + self.stdin = RecordingStdin() + self.wait_calls = 0 + self.kill_calls = 0 + self.wait_return_code = wait_return_code + + def wait(self): + self.wait_calls += 1 + return self.wait_return_code + + def kill(self): + self.kill_calls += 1 + + +class RecordingStdin(io.BytesIO): + def close(self): + self.was_closed = True + + +def test_run_jsonl_import_streams_data_and_cleans_up(monkeypatch): + process = RecordingProcess() + cleanup_calls = [] + fake_path = "/photon/data/temp/andorra.jsonl.zst" + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n', b'{"type":"Place2"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) + + importer.run_jsonl_import() + + assert process.stdin.getvalue() == b'{"type":"Place"}\n{"type":"Place2"}\n' + assert process.wait_calls == 1 + assert process.kill_calls == 0 + assert cleanup_calls == [True] + + +def test_run_jsonl_import_kills_process_and_cleans_up_on_stream_failure(monkeypatch): + process = RecordingProcess() + cleanup_calls = [] + fake_path = "/photon/data/temp/andorra.jsonl.zst" + + def broken_stream(path): + yield b'{"type":"Place"}\n' + raise RuntimeError("boom") + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) + monkeypatch.setattr(importer, "stream_decompress", broken_stream) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) + + with pytest.raises(RuntimeError, match="boom"): + importer.run_jsonl_import() + + assert process.kill_calls == 1 + assert process.wait_calls == 1 + assert cleanup_calls == [True] + + +def test_run_jsonl_import_raises_when_import_process_fails(monkeypatch): + process = RecordingProcess(wait_return_code=2) + cleanup_calls = [] + fake_path = "/photon/data/temp/andorra.jsonl.zst" + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) + + with pytest.raises(RuntimeError, match="exit code 2"): + importer.run_jsonl_import() + + assert process.kill_calls == 1 + assert process.wait_calls == 2 + assert cleanup_calls == [True] diff --git a/tests/utils/test_regions.py b/tests/utils/test_regions.py index ae428c0b..fb88a9aa 100644 --- a/tests/utils/test_regions.py +++ b/tests/utils/test_regions.py @@ -1,6 +1,13 @@ import pytest -from src.utils.regions import get_index_url_path, get_region_info, is_valid_region, normalize_region +from src.utils.regions import ( + get_index_url_path, + get_jsonl_url_path, + get_region_info, + get_regions_for_jsonl, + is_valid_region, + normalize_region, +) @pytest.mark.parametrize( @@ -28,7 +35,12 @@ def test_is_valid_region(region: str, expected: bool): def test_get_region_info_for_alias_returns_canonical_region_metadata(): - assert get_region_info("us") == {"type": "sub-region", "continent": "north-america", "available": True} + assert get_region_info("us") == { + "type": "sub-region", + "continent": "north-america", + "db_available": True, + "jsonl_available": True, + } @pytest.mark.parametrize( @@ -48,3 +60,19 @@ def test_get_index_url_path(region: str | None, expected: str): def test_get_index_url_path_raises_for_unknown_region(): with pytest.raises(ValueError, match="Unknown region: atlantis"): get_index_url_path("atlantis", "1.0", "tar.bz2") + + +@pytest.mark.parametrize( + ("region", "expected"), + [ + ("planet", "/photon-dump-planet-master-latest.jsonl.zst"), + ("europe", "/europe/photon-dump-europe-master-latest.jsonl.zst"), + ("us", "/north-america/usa/photon-dump-usa-master-latest.jsonl.zst"), + ], +) +def test_get_jsonl_url_path(region: str, expected: str): + assert get_jsonl_url_path(region, "jsonl.zst") == expected + + +def test_get_regions_for_jsonl_normalizes_aliases(): + assert get_regions_for_jsonl(["DE"]) == ["germany"] diff --git a/tests/utils/test_validate_config.py b/tests/utils/test_validate_config.py index 678b9fe1..63e423b1 100644 --- a/tests/utils/test_validate_config.py +++ b/tests/utils/test_validate_config.py @@ -5,9 +5,12 @@ def _set_base_config(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "IMPORT_MODE", "db") monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", "30d") monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "MD5_URL", None) def test_validate_config_accepts_valid_configuration(monkeypatch: pytest.MonkeyPatch): @@ -42,6 +45,47 @@ def test_validate_config_rejects_invalid_region(monkeypatch: pytest.MonkeyPatch) validate_config() +def test_validate_config_rejects_invalid_import_mode(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "IMPORT_MODE", "archive") + + with pytest.raises(ValueError, match="Invalid IMPORT_MODE: 'archive'"): + validate_config() + + +def test_validate_config_accepts_jsonl_single_region(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + monkeypatch.setattr(config, "REGION", "de") + + validate_config() + + +def test_validate_config_requires_regions_for_jsonl(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + + with pytest.raises(ValueError, match="REGION is required when IMPORT_MODE=jsonl"): + validate_config() + + +def test_validate_config_rejects_multiple_jsonl_regions_for_now(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") + monkeypatch.setattr(config, "REGION", "de,fr") + + with pytest.raises(ValueError, match="currently supports exactly one region"): + validate_config() + + +def test_validate_config_rejects_multiple_db_regions(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "REGION", "germany,andorra") + + with pytest.raises(ValueError, match="DB mode supports exactly one region"): + validate_config() + + def test_validate_config_reports_multiple_errors(monkeypatch: pytest.MonkeyPatch): _set_base_config(monkeypatch) monkeypatch.setattr(config, "UPDATE_STRATEGY", "WRONG") From 0cbf91997bbea9a912bcf195ad8fa0d34dc47f4c Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Thu, 2 Apr 2026 14:04:32 +0200 Subject: [PATCH 06/27] jsonl e2e test --- .github/workflows/full-test-jsonl.yml | 103 ++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 .github/workflows/full-test-jsonl.yml diff --git a/.github/workflows/full-test-jsonl.yml b/.github/workflows/full-test-jsonl.yml new file mode 100644 index 00000000..9b79cd19 --- /dev/null +++ b/.github/workflows/full-test-jsonl.yml @@ -0,0 +1,103 @@ +name: Container Test + +on: + pull_request: + branches: + - main + - dev + paths: + - "Dockerfile" + - "src/**" + - "docker-compose*.yml" + - ".last_release" + - "pyproject.toml" + - "uv.lock" + +jobs: + test-container-jsonl: + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@v6 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Read Photon version from .last_release + id: photon_version + run: | + PHOTON_VERSION=$(cat .last_release | tr -d '[:space:]') + if [[ -z "$PHOTON_VERSION" || ! "$PHOTON_VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Error: .last_release is missing, empty, or contains an invalid version: '$PHOTON_VERSION'" + exit 1 + fi + echo "PHOTON_VERSION=$PHOTON_VERSION" >> "$GITHUB_ENV" + echo "Photon Version: $PHOTON_VERSION" + + - name: Build test image + uses: docker/build-push-action@v6 + with: + context: . + file: ./Dockerfile + build-args: | + PHOTON_VERSION=${{ env.PHOTON_VERSION }} + push: false + load: true + tags: photon-test:pr-${{ github.event.pull_request.number }} + platforms: linux/amd64 + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Start container + run: | + docker run -d \ + --name photon-test-pr-${{ github.event.pull_request.number }} \ + -e REGION=andorra \ + -e IMPORT_MODE=jsonl \ + -e UPDATE_STRATEGY=DISABLED \ + photon-test:pr-${{ github.event.pull_request.number }} + + - name: Wait for container to be healthy + run: | + echo "Waiting for container to become healthy (timeout: 6 minutes)..." + CONTAINER_NAME=photon-test-pr-${{ github.event.pull_request.number }} + + docker logs -f $CONTAINER_NAME & + LOGS_PID=$! + + SECONDS=0 + TIMEOUT=360 + + while [ $SECONDS -lt $TIMEOUT ]; do + HEALTH_STATUS=$(docker inspect --format='{{.State.Health.Status}}' $CONTAINER_NAME 2>/dev/null || echo "unknown") + + if [ "$HEALTH_STATUS" = "healthy" ]; then + echo "Container is healthy after $SECONDS seconds" + kill $LOGS_PID 2>/dev/null || true + exit 0 + fi + + echo "Health status: $HEALTH_STATUS (elapsed: ${SECONDS}s)" + sleep 10 + SECONDS=$((SECONDS + 10)) + done + + kill $LOGS_PID 2>/dev/null || true + echo "Container failed to become healthy within $TIMEOUT seconds" + docker logs $CONTAINER_NAME + exit 1 + + - name: Cleanup + if: always() + run: | + docker stop photon-test-pr-${{ github.event.pull_request.number }} || true + docker rm photon-test-pr-${{ github.event.pull_request.number }} || true + docker rmi photon-test:pr-${{ github.event.pull_request.number }} || true + + - name: Output summary + if: always() + run: | + echo "## Container Test Summary" >> $GITHUB_STEP_SUMMARY + echo "- **PR Number:** ${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY + echo "- **Photon Version:** ${{ env.PHOTON_VERSION }}" >> $GITHUB_STEP_SUMMARY + echo "- **Status:** ${{ job.status }}" >> $GITHUB_STEP_SUMMARY From ae0729d6297be324a9f294d023dc02a5520f28cf Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Mon, 27 Apr 2026 22:43:07 +0200 Subject: [PATCH 07/27] feat: add multi-region JSONL support with country codes --- src/importer.py | 18 +- src/utils/regions.py | 398 +++++++++++++++++++++++++--- src/utils/validate_config.py | 4 +- tests/test_importer.py | 79 +++++- tests/utils/test_regions.py | 56 ++++ tests/utils/test_validate_config.py | 5 +- 6 files changed, 514 insertions(+), 46 deletions(-) diff --git a/src/importer.py b/src/importer.py index da082b2f..d878642e 100644 --- a/src/importer.py +++ b/src/importer.py @@ -7,21 +7,22 @@ from src.jsonl.downloader import download_jsonl from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import get_regions_for_jsonl +from src.utils.regions import get_country_codes_for_regions, get_jsonl_parent_region, get_regions_for_jsonl logger = get_logger(__name__) def run_jsonl_import() -> None: regions = get_regions_for_jsonl(config.get_jsonl_regions()) - if len(regions) != 1: - raise ValueError("JSONL mode currently supports exactly one region.") + if not regions: + raise ValueError("JSONL mode requires at least one region.") - region = regions[0] + parent_region = get_jsonl_parent_region(regions) + country_codes = get_country_codes_for_regions(regions) if len(regions) > 1 else None try: - jsonl_path = download_jsonl(region) - import_proc = _start_photon_import("-") + jsonl_path = download_jsonl(parent_region) + import_proc = _start_photon_import("-", country_codes=country_codes) try: if import_proc.stdin is None: raise RuntimeError("Photon import process stdin is unavailable") @@ -40,7 +41,7 @@ def run_jsonl_import() -> None: clear_temp_dir() -def _start_photon_import(input_source: str) -> subprocess.Popen: +def _start_photon_import(input_source: str, country_codes: list[str] | None = None) -> subprocess.Popen: os.makedirs(config.DATA_DIR, exist_ok=True) cmd = ["java"] @@ -57,6 +58,9 @@ def _start_photon_import(input_source: str) -> subprocess.Popen: if extra_tags: cmd.extend(["-extra-tags", ",".join(extra_tags)]) + if country_codes: + cmd.extend(["-country-codes", ",".join(country_codes)]) + if config.IMPORT_GEOMETRIES: cmd.append("-full-geometries") diff --git a/src/utils/regions.py b/src/utils/regions.py index ba38b65c..8b13298d 100644 --- a/src/utils/regions.py +++ b/src/utils/regions.py @@ -1,32 +1,298 @@ -REGION_MAPPING = { - "planet": {"type": "planet", "continent": None, "db_available": True, "jsonl_available": True}, - "africa": {"type": "continent", "continent": "africa", "db_available": True, "jsonl_available": True}, - "asia": {"type": "continent", "continent": "asia", "db_available": True, "jsonl_available": True}, - "australia-oceania": { - "type": "continent", - "continent": "australia-oceania", - "db_available": True, +PLANET_COUNTRY_CODES = [ + "DZ", + "EG", + "EH", + "LY", + "MA", + "SD", + "TN", + "BJ", + "BF", + "CV", + "GH", + "GN", + "GW", + "CI", + "LR", + "ML", + "MR", + "NE", + "SN", + "GM", + "SL", + "TG", + "NG", + "AO", + "CM", + "CF", + "TD", + "CG", + "CD", + "GQ", + "GA", + "ST", + "BI", + "KE", + "MW", + "MZ", + "RW", + "TZ", + "UG", + "SS", + "ZM", + "ZW", + "ER", + "ET", + "DJ", + "SO", + "MG", + "MU", + "SC", + "KM", + "BW", + "LS", + "NA", + "ZA", + "SZ", + "SH", + "KZ", + "KG", + "TJ", + "TM", + "UZ", + "BH", + "KW", + "OM", + "QA", + "SA", + "AE", + "YE", + "MV", + "LK", + "IO", + "AM", + "AZ", + "IR", + "IQ", + "IL", + "JO", + "LB", + "PS", + "SY", + "AF", + "BD", + "BT", + "NP", + "PK", + "CN", + "MN", + "KP", + "KR", + "TW", + "BN", + "KH", + "ID", + "LA", + "MY", + "MM", + "PH", + "SG", + "TH", + "TL", + "VN", + "IN", + "JP", + "AU", + "NZ", + "CK", + "FJ", + "KI", + "MH", + "FM", + "NR", + "NU", + "PW", + "PG", + "PN", + "WS", + "SB", + "TK", + "TO", + "TV", + "VU", + "AL", + "BY", + "BE", + "BA", + "BG", + "HR", + "CY", + "CZ", + "EE", + "LT", + "LV", + "GB", + "IM", + "GG", + "JE", + "IS", + "FO", + "IE", + "FI", + "GE", + "GR", + "HU", + "IT", + "VA", + "SM", + "XK", + "CH", + "LI", + "MK", + "MT", + "MD", + "ME", + "NO", + "PL", + "PT", + "RO", + "RS", + "SI", + "SE", + "TR", + "UA", + "AD", + "AT", + "DK", + "ES", + "GI", + "FR", + "MC", + "DE", + "LU", + "NL", + "RU", + "SK", + "BZ", + "CR", + "SV", + "GT", + "HN", + "NI", + "PA", + "BS", + "CU", + "HT", + "DO", + "JM", + "AG", + "AI", + "BB", + "DM", + "GD", + "KN", + "KY", + "LC", + "MS", + "TC", + "TT", + "VC", + "VG", + "GL", + "BM", + "CA", + "US", + "MX", + "CL", + "BR", + "BO", + "CO", + "EC", + "PY", + "PE", + "UY", + "GY", + "VE", + "SR", + "FK", + "GS", + "AR", +] + +AFRICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[:56] +ASIA_COUNTRY_CODES = PLANET_COUNTRY_CODES[56:104] +AUSTRALIA_OCEANIA_COUNTRY_CODES = PLANET_COUNTRY_CODES[104:122] +EUROPE_COUNTRY_CODES = PLANET_COUNTRY_CODES[122:176] +NORTH_AMERICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[176:207] +SOUTH_AMERICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[207:220] + + +def _region(region_type: str, continent: str | None, db_available: bool, country_codes: list[str]) -> dict: + return { + "type": region_type, + "continent": continent, + "db_available": db_available, "jsonl_available": True, - }, - "europe": {"type": "continent", "continent": "europe", "db_available": True, "jsonl_available": True}, - "north-america": {"type": "continent", "continent": "north-america", "db_available": True, "jsonl_available": True}, - "south-america": {"type": "continent", "continent": "south-america", "db_available": True, "jsonl_available": True}, - "india": {"type": "sub-region", "continent": "asia", "db_available": True, "jsonl_available": True}, - "japan": {"type": "sub-region", "continent": "asia", "db_available": True, "jsonl_available": True}, - "andorra": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "austria": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "denmark": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "france-monacco": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "germany": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "luxemburg": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "netherlands": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "russia": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "slovakia": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "spain": {"type": "sub-region", "continent": "europe", "db_available": True, "jsonl_available": True}, - "canada": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, - "mexico": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, - "usa": {"type": "sub-region", "continent": "north-america", "db_available": True, "jsonl_available": True}, - "argentina": {"type": "sub-region", "continent": "south-america", "db_available": True, "jsonl_available": True}, + "country_codes": country_codes, + } + + +REGION_MAPPING = { + "planet": _region("planet", None, True, PLANET_COUNTRY_CODES), + "africa": _region("continent", "africa", True, AFRICA_COUNTRY_CODES), + "asia": _region("continent", "asia", True, ASIA_COUNTRY_CODES), + "australia-oceania": _region("continent", "australia-oceania", True, AUSTRALIA_OCEANIA_COUNTRY_CODES), + "europe": _region("continent", "europe", True, EUROPE_COUNTRY_CODES), + "north-america": _region("continent", "north-america", True, NORTH_AMERICA_COUNTRY_CODES), + "south-america": _region("continent", "south-america", True, SOUTH_AMERICA_COUNTRY_CODES), + "india": _region("sub-region", "asia", True, ["IN"]), + "japan": _region("sub-region", "asia", True, ["JP"]), + "andorra": _region("sub-region", "europe", True, ["AD"]), + "austria": _region("sub-region", "europe", True, ["AT"]), + "albania": _region("sub-region", "europe", False, ["AL"]), + "baltics": _region("sub-region", "europe", False, ["EE", "LT", "LV"]), + "belarus": _region("sub-region", "europe", False, ["BY"]), + "belgium": _region("sub-region", "europe", False, ["BE"]), + "bosnia-herzegovina": _region("sub-region", "europe", False, ["BA"]), + "british-islands": _region("sub-region", "europe", False, ["GB", "IM", "GG", "JE"]), + "bulgaria": _region("sub-region", "europe", False, ["BG"]), + "croatia": _region("sub-region", "europe", False, ["HR"]), + "cyprus": _region("sub-region", "europe", False, ["CY"]), + "czech-republic": _region("sub-region", "europe", False, ["CZ"]), + "denmark": _region("sub-region", "europe", True, ["DK"]), + "finland": _region("sub-region", "europe", False, ["FI"]), + "france-monacco": _region("sub-region", "europe", True, ["FR", "MC"]), + "georgia": _region("sub-region", "europe", False, ["GE"]), + "germany": _region("sub-region", "europe", True, ["DE"]), + "greece": _region("sub-region", "europe", False, ["GR"]), + "hungary": _region("sub-region", "europe", False, ["HU"]), + "iceland-faroe": _region("sub-region", "europe", False, ["IS", "FO"]), + "ireland": _region("sub-region", "europe", False, ["IE"]), + "italy": _region("sub-region", "europe", False, ["IT", "VA", "SM"]), + "kosovo": _region("sub-region", "europe", False, ["XK"]), + "luxemburg": _region("sub-region", "europe", True, ["LU"]), + "macedonia": _region("sub-region", "europe", False, ["MK"]), + "malta": _region("sub-region", "europe", False, ["MT"]), + "moldova": _region("sub-region", "europe", False, ["MD"]), + "montenegro": _region("sub-region", "europe", False, ["ME"]), + "netherlands": _region("sub-region", "europe", True, ["NL"]), + "norway": _region("sub-region", "europe", False, ["NO"]), + "poland": _region("sub-region", "europe", False, ["PL"]), + "portugal": _region("sub-region", "europe", False, ["PT"]), + "romania": _region("sub-region", "europe", False, ["RO"]), + "russia": _region("sub-region", "europe", True, ["RU"]), + "serbia": _region("sub-region", "europe", False, ["RS"]), + "slovakia": _region("sub-region", "europe", True, ["SK"]), + "slovenia": _region("sub-region", "europe", False, ["SI"]), + "spain": _region("sub-region", "europe", True, ["ES", "GI"]), + "sweden": _region("sub-region", "europe", False, ["SE"]), + "switzerland-liechtenstein": _region("sub-region", "europe", False, ["CH", "LI"]), + "turkey": _region("sub-region", "europe", False, ["TR"]), + "ukraine": _region("sub-region", "europe", False, ["UA"]), + "canada": _region("sub-region", "north-america", True, ["CA"]), + "mexico": _region("sub-region", "north-america", True, ["MX"]), + "usa": _region("sub-region", "north-america", True, ["US"]), + "argentina": _region("sub-region", "south-america", True, ["AR"]), } REGION_ALIASES = { @@ -34,14 +300,43 @@ "jp": "japan", "ad": "andorra", "at": "austria", + "al": "albania", + "by": "belarus", + "be": "belgium", + "ba": "bosnia-herzegovina", + "bg": "bulgaria", + "hr": "croatia", + "cy": "cyprus", + "cz": "czech-republic", "dk": "denmark", + "fi": "finland", "fr": "france-monacco", "de": "germany", + "gr": "greece", + "hu": "hungary", + "is": "iceland-faroe", + "ie": "ireland", + "it": "italy", + "xk": "kosovo", "lu": "luxemburg", + "mk": "macedonia", + "mt": "malta", + "md": "moldova", + "me": "montenegro", "nl": "netherlands", + "no": "norway", + "pl": "poland", + "pt": "portugal", + "ro": "romania", "ru": "russia", + "rs": "serbia", "sk": "slovakia", + "si": "slovenia", "es": "spain", + "se": "sweden", + "ch": "switzerland-liechtenstein", + "tr": "turkey", + "ua": "ukraine", "ca": "canada", "mx": "mexico", "us": "usa", @@ -56,6 +351,9 @@ "holland": "netherlands", "espana": "spain", "españa": "spain", + "czechia": "czech-republic", + "uk": "british-islands", + "great britain": "british-islands", } @@ -152,6 +450,46 @@ def get_regions_for_jsonl(regions: list[str]) -> list[str]: raise ValueError(f"Unknown region: {region}") if not region_info.get("jsonl_available", False): raise ValueError(f"JSONL not available for region: {region}") - validated_regions.append(normalize_region(region)) - return [region for region in validated_regions if region] + normalized_region = normalize_region(region) + if normalized_region and normalized_region not in validated_regions: + validated_regions.append(normalized_region) + + return validated_regions + + +def get_country_codes_for_regions(regions: list[str]) -> list[str]: + country_codes: list[str] = [] + + for region in get_regions_for_jsonl(regions): + region_info = get_region_info(region) + if not region_info: + raise ValueError(f"Unknown region: {region}") + + for country_code in region_info["country_codes"]: + if country_code not in country_codes: + country_codes.append(country_code) + + return country_codes + + +def get_jsonl_parent_region(regions: list[str]) -> str: + normalized_regions = get_regions_for_jsonl(regions) + if not normalized_regions: + raise ValueError("At least one region is required") + if len(normalized_regions) == 1: + return normalized_regions[0] + + if "planet" in normalized_regions: + return "planet" + + continents = { + REGION_MAPPING[region]["continent"] + for region in normalized_regions + if REGION_MAPPING[region]["type"] in {"continent", "sub-region"} + } + + if len(continents) == 1: + return continents.pop() + + return "planet" diff --git a/src/utils/validate_config.py b/src/utils/validate_config.py index 9fc846be..0e4fa23c 100644 --- a/src/utils/validate_config.py +++ b/src/utils/validate_config.py @@ -43,9 +43,7 @@ def validate_config(): error_messages.append("REGION is required when IMPORT_MODE=jsonl.") else: try: - validated_regions = get_regions_for_jsonl(config.get_jsonl_regions()) - if len(validated_regions) != 1: - error_messages.append("JSONL mode currently supports exactly one region.") + get_regions_for_jsonl(config.get_jsonl_regions()) except ValueError as exc: error_messages.append(str(exc)) diff --git a/tests/test_importer.py b/tests/test_importer.py index 80f2c186..01034847 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -54,6 +54,79 @@ def fake_popen(cmd, cwd, stdin): ] +def test_start_photon_import_includes_country_codes(monkeypatch): + commands = [] + + class DummyProcess: + def __init__(self): + self.stdin = io.BytesIO() + + def fake_popen(cmd, cwd, stdin): + commands.append({"cmd": cmd, "cwd": cwd, "stdin": stdin}) + return DummyProcess() + + monkeypatch.setattr(config, "JAVA_PARAMS", "-Xmx2g") + monkeypatch.setattr(config, "LANGUAGES", None) + monkeypatch.setattr(config, "EXTRA_TAGS", None) + monkeypatch.setattr(config, "IMPORT_GEOMETRIES", False) + monkeypatch.setattr(importer.os, "makedirs", _noop_makedirs) + monkeypatch.setattr(importer.subprocess, "Popen", fake_popen) + + importer._start_photon_import("-", country_codes=["AD", "LU"]) + + assert commands[0]["cmd"] == [ + "java", + "-Xmx2g", + "-jar", + "/photon/photon.jar", + "import", + "-import-file", + "-", + "-data-dir", + config.DATA_DIR, + "-country-codes", + "AD,LU", + ] + + +def test_run_jsonl_import_uses_parent_region_and_country_codes_for_multi_region(monkeypatch): + process = RecordingProcess() + download_args = [] + + def fake_download(region): + download_args.append(region) + return "/photon/data/temp/europe.jsonl.zst" + + monkeypatch.setattr(config, "REGION", "andorra,luxemburg") + monkeypatch.setattr(importer, "download_jsonl", fake_download) + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + + importer.run_jsonl_import() + + assert download_args == ["europe"] + + +def test_run_jsonl_import_uses_single_region_without_country_codes(monkeypatch): + process = RecordingProcess() + import_args = [] + + def fake_start_import(input_source, country_codes=None): + import_args.append({"input_source": input_source, "country_codes": country_codes}) + return process + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: "/photon/data/temp/andorra.jsonl.zst") + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", fake_start_import) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + + importer.run_jsonl_import() + + assert import_args[0]["country_codes"] is None + + class RecordingProcess: def __init__(self, wait_return_code: int = 0): self.stdin = RecordingStdin() @@ -82,7 +155,7 @@ def test_run_jsonl_import_streams_data_and_cleans_up(monkeypatch): monkeypatch.setattr(config, "REGION", "andorra") monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n', b'{"type":"Place2"}\n']) - monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) importer.run_jsonl_import() @@ -105,7 +178,7 @@ def broken_stream(path): monkeypatch.setattr(config, "REGION", "andorra") monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) monkeypatch.setattr(importer, "stream_decompress", broken_stream) - monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) with pytest.raises(RuntimeError, match="boom"): @@ -124,7 +197,7 @@ def test_run_jsonl_import_raises_when_import_process_fails(monkeypatch): monkeypatch.setattr(config, "REGION", "andorra") monkeypatch.setattr(importer, "download_jsonl", lambda region: fake_path) monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) - monkeypatch.setattr(importer, "_start_photon_import", lambda input_source: process) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: cleanup_calls.append(True)) with pytest.raises(RuntimeError, match="exit code 2"): diff --git a/tests/utils/test_regions.py b/tests/utils/test_regions.py index fb88a9aa..291093ab 100644 --- a/tests/utils/test_regions.py +++ b/tests/utils/test_regions.py @@ -1,7 +1,9 @@ import pytest from src.utils.regions import ( + get_country_codes_for_regions, get_index_url_path, + get_jsonl_parent_region, get_jsonl_url_path, get_region_info, get_regions_for_jsonl, @@ -40,6 +42,7 @@ def test_get_region_info_for_alias_returns_canonical_region_metadata(): "continent": "north-america", "db_available": True, "jsonl_available": True, + "country_codes": ["US"], } @@ -76,3 +79,56 @@ def test_get_jsonl_url_path(region: str, expected: str): def test_get_regions_for_jsonl_normalizes_aliases(): assert get_regions_for_jsonl(["DE"]) == ["germany"] + + +def test_get_regions_for_jsonl_deduplicates(): + result = get_regions_for_jsonl(["de", "germany", "DE"]) + assert result == ["germany"] + + +def test_get_regions_for_jsonl_multiple_regions(): + result = get_regions_for_jsonl(["andorra", "luxemburg"]) + assert result == ["andorra", "luxemburg"] + + +def test_get_regions_for_jsonl_rejects_unknown(): + with pytest.raises(ValueError, match="Unknown region"): + get_regions_for_jsonl(["germany", "atlantis"]) + + +def test_get_country_codes_for_regions_single(): + assert get_country_codes_for_regions(["andorra"]) == ["AD"] + + +def test_get_country_codes_for_regions_multiple_deduplicates(): + codes = get_country_codes_for_regions(["andorra", "luxemburg"]) + assert codes == ["AD", "LU"] + + +def test_get_country_codes_for_regions_overlapping(): + codes = get_country_codes_for_regions(["france-monacco", "monaco"]) + assert "FR" in codes + assert "MC" in codes + assert codes.count("FR") == 1 + assert codes.count("MC") == 1 + + +def test_get_jsonl_parent_region_single(): + assert get_jsonl_parent_region(["andorra"]) == "andorra" + + +def test_get_jsonl_parent_region_same_continent(): + assert get_jsonl_parent_region(["andorra", "luxemburg"]) == "europe" + + +def test_get_jsonl_parent_region_across_continents(): + assert get_jsonl_parent_region(["germany", "japan"]) == "planet" + + +def test_get_jsonl_parent_region_with_planet(): + assert get_jsonl_parent_region(["planet", "germany"]) == "planet" + + +def test_get_jsonl_parent_region_rejects_empty(): + with pytest.raises(ValueError, match="At least one region"): + get_jsonl_parent_region([]) diff --git a/tests/utils/test_validate_config.py b/tests/utils/test_validate_config.py index 63e423b1..6ca29812 100644 --- a/tests/utils/test_validate_config.py +++ b/tests/utils/test_validate_config.py @@ -69,13 +69,12 @@ def test_validate_config_requires_regions_for_jsonl(monkeypatch: pytest.MonkeyPa validate_config() -def test_validate_config_rejects_multiple_jsonl_regions_for_now(monkeypatch: pytest.MonkeyPatch): +def test_validate_config_accepts_multiple_jsonl_regions(monkeypatch: pytest.MonkeyPatch): _set_base_config(monkeypatch) monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") monkeypatch.setattr(config, "REGION", "de,fr") - with pytest.raises(ValueError, match="currently supports exactly one region"): - validate_config() + validate_config() def test_validate_config_rejects_multiple_db_regions(monkeypatch: pytest.MonkeyPatch): From 94999f5b53c20ef2da2f35e80db61c642fc7ecb6 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Tue, 9 Jun 2026 17:20:16 +0200 Subject: [PATCH 08/27] fix: prevent serving empty index after a failed JSONL import --- src/entrypoint.py | 3 +++ src/filesystem.py | 43 ++++++++++++++++++++++++++++++++++++ src/importer.py | 5 ++++- src/process_manager.py | 4 +++- tests/test_filesystem.py | 47 ++++++++++++++++++++++++++++++++++++++++ tests/test_importer.py | 44 +++++++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 2 deletions(-) diff --git a/src/entrypoint.py b/src/entrypoint.py index 308d939d..ff2d81b0 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -3,6 +3,7 @@ from src.check_remote import check_index_age from src.downloader import InsufficientSpaceError, parallel_update, sequential_update +from src.filesystem import reconcile_interrupted_import from src.importer import run_jsonl_import from src.utils import config from src.utils.logger import get_logger, setup_logging @@ -76,6 +77,8 @@ def main(): if config.MIN_INDEX_DATE: logger.info(f"MIN_INDEX_DATE: {config.MIN_INDEX_DATE}") + reconcile_interrupted_import() + if config.FORCE_UPDATE: logger.info("Starting forced update") try: diff --git a/src/filesystem.py b/src/filesystem.py index 5bd1fcc8..70dbf31f 100644 --- a/src/filesystem.py +++ b/src/filesystem.py @@ -207,3 +207,46 @@ def update_timestamp_marker(): logging.info(f"Updated timestamp marker: {marker_file}") except Exception as e: logging.warning(f"Failed to update timestamp marker: {e}") + + +def _import_in_progress_marker() -> str: + return os.path.join(config.DATA_DIR, ".photon-import-in-progress") + + +def mark_import_in_progress(): + marker_file = _import_in_progress_marker() + try: + Path(marker_file).touch() + logging.debug(f"Marked import in progress: {marker_file}") + except Exception as e: + logging.warning(f"Failed to write import-in-progress marker: {e}") + + +def clear_import_in_progress(): + marker_file = _import_in_progress_marker() + try: + Path(marker_file).unlink(missing_ok=True) + except Exception as e: + logging.warning(f"Failed to clear import-in-progress marker: {e}") + + +def import_was_interrupted() -> bool: + return os.path.exists(_import_in_progress_marker()) + + +def remove_incomplete_index(): + if os.path.isdir(config.PHOTON_DATA_DIR): + logging.warning(f"Removing incomplete index at {config.PHOTON_DATA_DIR}") + shutil.rmtree(config.PHOTON_DATA_DIR) + + +def reconcile_interrupted_import(): + if not import_was_interrupted(): + return + + logging.warning( + "Detected an interrupted import (in-progress marker present). " + "Removing the partial index so a clean import can run." + ) + remove_incomplete_index() + clear_import_in_progress() diff --git a/src/importer.py b/src/importer.py index d878642e..c0fda198 100644 --- a/src/importer.py +++ b/src/importer.py @@ -2,7 +2,7 @@ import shlex import subprocess -from src.filesystem import clear_temp_dir +from src.filesystem import clear_import_in_progress, clear_temp_dir, mark_import_in_progress, update_timestamp_marker from src.jsonl.decompressor import stream_decompress from src.jsonl.downloader import download_jsonl from src.utils import config @@ -20,6 +20,7 @@ def run_jsonl_import() -> None: parent_region = get_jsonl_parent_region(regions) country_codes = get_country_codes_for_regions(regions) if len(regions) > 1 else None + mark_import_in_progress() try: jsonl_path = download_jsonl(parent_region) import_proc = _start_photon_import("-", country_codes=country_codes) @@ -37,6 +38,8 @@ def run_jsonl_import() -> None: import_proc.kill() import_proc.wait() raise + update_timestamp_marker() + clear_import_in_progress() finally: clear_temp_dir() diff --git a/src/process_manager.py b/src/process_manager.py index dee435f2..ae26f881 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -14,7 +14,7 @@ from requests.exceptions import RequestException from src.check_remote import compare_mtime -from src.filesystem import cleanup_backup_after_verification +from src.filesystem import cleanup_backup_after_verification, reconcile_interrupted_import from src.utils import config from src.utils.logger import get_logger, setup_logging @@ -310,6 +310,8 @@ def shutdown(self): def run(self): logger.info("Photon Manager starting...") + reconcile_interrupted_import() + if not config.FORCE_UPDATE and os.path.isdir(config.OS_NODE_DIR): logger.info("Existing index found, skipping initial setup") else: diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index b871ebd7..67376023 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -96,6 +96,53 @@ def test_update_timestamp_marker_swallows_errors(fake_dirs: Path): filesystem.update_timestamp_marker() +def test_import_in_progress_marker_roundtrip(fake_dirs: Path): + assert filesystem.import_was_interrupted() is False + + filesystem.mark_import_in_progress() + assert filesystem.import_was_interrupted() is True + + filesystem.clear_import_in_progress() + assert filesystem.import_was_interrupted() is False + + +def test_clear_import_in_progress_is_idempotent(fake_dirs: Path): + filesystem.clear_import_in_progress() + assert filesystem.import_was_interrupted() is False + + +def test_remove_incomplete_index_removes_photon_data(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + (node_dir / "segment.bin").write_text("partial") + + filesystem.remove_incomplete_index() + + assert not Path(config.PHOTON_DATA_DIR).exists() + + +def test_reconcile_interrupted_import_cleans_partial_index(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + (node_dir / "segment.bin").write_text("partial") + filesystem.mark_import_in_progress() + + filesystem.reconcile_interrupted_import() + + assert not Path(config.PHOTON_DATA_DIR).exists() + assert filesystem.import_was_interrupted() is False + + +def test_reconcile_interrupted_import_noop_without_marker(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + (node_dir / "segment.bin").write_text("complete") + + filesystem.reconcile_interrupted_import() + + assert Path(config.OS_NODE_DIR).exists() + + def test_cleanup_staging_and_temp_backup_removes_both(tmp_path: Path): staging = tmp_path / "staging" backup = tmp_path / "backup" diff --git a/tests/test_importer.py b/tests/test_importer.py index 01034847..16da5aff 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -6,6 +6,13 @@ from src.utils import config +@pytest.fixture(autouse=True) +def _stub_index_markers(monkeypatch): + monkeypatch.setattr(importer, "mark_import_in_progress", lambda: None) + monkeypatch.setattr(importer, "update_timestamp_marker", lambda: None) + monkeypatch.setattr(importer, "clear_import_in_progress", lambda: None) + + def _noop_makedirs(path: str, exist_ok: bool = False) -> None: _ = (path, exist_ok) @@ -206,3 +213,40 @@ def test_run_jsonl_import_raises_when_import_process_fails(monkeypatch): assert process.kill_calls == 1 assert process.wait_calls == 2 assert cleanup_calls == [True] + + +def test_run_jsonl_import_marks_then_clears_progress_on_success(monkeypatch): + process = RecordingProcess() + events = [] + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: "/photon/data/temp/andorra.jsonl.zst") + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + monkeypatch.setattr(importer, "mark_import_in_progress", lambda: events.append("mark")) + monkeypatch.setattr(importer, "update_timestamp_marker", lambda: events.append("timestamp")) + monkeypatch.setattr(importer, "clear_import_in_progress", lambda: events.append("clear")) + + importer.run_jsonl_import() + + assert events == ["mark", "timestamp", "clear"] + + +def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): + process = RecordingProcess(wait_return_code=2) + events = [] + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: "/photon/data/temp/andorra.jsonl.zst") + monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) + monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + monkeypatch.setattr(importer, "mark_import_in_progress", lambda: events.append("mark")) + monkeypatch.setattr(importer, "update_timestamp_marker", lambda: events.append("timestamp")) + monkeypatch.setattr(importer, "clear_import_in_progress", lambda: events.append("clear")) + + with pytest.raises(RuntimeError, match="exit code 2"): + importer.run_jsonl_import() + + assert events == ["mark"] From d959484e47e72b1240f61d9ad55015184d63095a Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Tue, 9 Jun 2026 17:32:49 +0200 Subject: [PATCH 09/27] ci: use uv native caching, pin to .python-version file --- .github/workflows/lint.yml | 94 ++++++++------------------------------ 1 file changed, 19 insertions(+), 75 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 51300155..27e436ca 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -10,109 +10,53 @@ on: workflow_dispatch: jobs: - setup: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 - with: - persist-credentials: false - - name: "Set up Python" - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 - with: - python-version-file: "pyproject.toml" - - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 - with: - enable-cache: true - version: 0.11.* - - name: Install dependencies - run: uv sync --locked - - name: Cache dependencies - uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 - with: - path: | - .venv - ~/.cache/uv - key: ${{ runner.os }}-uv-${{ hashFiles('uv.lock') }} - lint: runs-on: ubuntu-latest - needs: setup steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: persist-credentials: false - - name: "Set up Python" - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: - python-version-file: "pyproject.toml" - - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 + python-version-file: ".python-version" + + - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 with: enable-cache: true version: 0.11.* - - name: Restore dependencies - uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 - with: - path: | - .venv - ~/.cache/uv - key: ${{ runner.os }}-uv-${{ hashFiles('uv.lock') }} - fail-on-cache-miss: true - - name: Run linting - run: | - uv run ruff check --fix - uv run ruff format + - run: uv sync --locked + - run: | + uv run ruff check + uv run ruff format --check typecheck: runs-on: ubuntu-latest - needs: setup steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: persist-credentials: false - - name: "Set up Python" - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: - python-version-file: "pyproject.toml" - - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 + python-version-file: ".python-version" + - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 with: enable-cache: true version: 0.11.* - - name: Restore dependencies - uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 - with: - path: | - .venv - ~/.cache/uv - key: ${{ runner.os }}-uv-${{ hashFiles('uv.lock') }} - fail-on-cache-miss: true - - name: Run type checking - run: uv run ty check + - run: uv sync --locked + - run: uv run ty check + vulture: runs-on: ubuntu-latest - needs: setup steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 with: persist-credentials: false - - name: "Set up Python" - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 with: - python-version-file: "pyproject.toml" - - name: Install uv - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 + python-version-file: ".python-version" + - uses: astral-sh/setup-uv@08807647e7069bb48b6ef5acd8ec9567f424441b # v8 with: enable-cache: true version: 0.11.* - - name: Restore dependencies - uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5 - with: - path: | - .venv - ~/.cache/uv - key: ${{ runner.os }}-uv-${{ hashFiles('uv.lock') }} - fail-on-cache-miss: true - - name: Run vulture - run: uv run vulture --min-confidence 100 --exclude ".venv" . + - run: uv sync --locked + - run: uv run vulture --min-confidence 100 --exclude ".venv" . From 53d2f68b0d9b406bf74cc4e2f68004b368904c30 Mon Sep 17 00:00:00 2001 From: "koalabot-rt[bot]" <292142184+koalabot-rt[bot]@users.noreply.github.com> Date: Fri, 12 Jun 2026 21:34:40 +0000 Subject: [PATCH 10/27] chore(deps): update dependency pytest to v9.0.3 [security] --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index c1d027b7..90d70b3c 100644 --- a/uv.lock +++ b/uv.lock @@ -331,7 +331,7 @@ wheels = [ [[package]] name = "pytest" -version = "9.0.2" +version = "9.0.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -340,9 +340,9 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" } +sdist = { url = "https://files.pythonhosted.org/packages/7d/0d/549bd94f1a0a402dc8cf64563a117c0f3765662e2e668477624baeec44d5/pytest-9.0.3.tar.gz", hash = "sha256:b86ada508af81d19edeb213c681b1d48246c1a91d304c6c81a427674c17eb91c", size = 1572165, upload-time = "2026-04-07T17:16:18.027Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" }, + { url = "https://files.pythonhosted.org/packages/d4/24/a372aaf5c9b7208e7112038812994107bc65a84cd00e0354a88c2c77a617/pytest-9.0.3-py3-none-any.whl", hash = "sha256:2c5efc453d45394fdd706ade797c0a81091eccd1d6e4bccfcd476e2b8e0ab5d9", size = 375249, upload-time = "2026-04-07T17:16:16.13Z" }, ] [[package]] From 827ffaff1bc21468e473a64e448ed4ff34722d47 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Wed, 10 Jun 2026 19:48:33 +0200 Subject: [PATCH 11/27] refactor: split filesystem/updater into index and update modules --- src/__init__.py | 3 - src/check_remote.py | 20 +- src/downloader.py | 202 ++-------- src/entrypoint.py | 73 ++-- src/filesystem.py | 252 ------------- src/importer.py | 8 +- src/index.py | 151 ++++++++ src/process_manager.py | 104 +++--- src/update.py | 168 +++++++++ src/updater.py | 41 --- ...downloader.py => test_jsonl_downloader.py} | 0 tests/test_check_remote.py | 18 - tests/test_downloader.py | 204 +++-------- tests/test_entrypoint.py | 206 ++++++----- tests/test_filesystem.py | 346 ------------------ tests/test_importer.py | 21 +- tests/test_index.py | 266 ++++++++++++++ tests/test_process_manager.py | 180 ++++++--- tests/test_update.py | 330 +++++++++++++++++ tests/test_updater.py | 57 --- 20 files changed, 1302 insertions(+), 1348 deletions(-) delete mode 100644 src/filesystem.py create mode 100644 src/index.py create mode 100644 src/update.py delete mode 100644 src/updater.py rename tests/jsonl/{test_downloader.py => test_jsonl_downloader.py} (100%) delete mode 100644 tests/test_filesystem.py create mode 100644 tests/test_index.py create mode 100644 tests/test_update.py delete mode 100644 tests/test_updater.py diff --git a/src/__init__.py b/src/__init__.py index a9e9ede0..e69de29b 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,3 +0,0 @@ -from src.downloader import InsufficientSpaceError - -__all__ = ["InsufficientSpaceError"] diff --git a/src/check_remote.py b/src/check_remote.py index 84400019..f35b6ead 100644 --- a/src/check_remote.py +++ b/src/check_remote.py @@ -1,10 +1,10 @@ import datetime -import os import requests from dateutil.parser import parse as parsedate from requests.exceptions import RequestException +from src import index from src.utils import config from src.utils.logger import get_logger from src.utils.regions import get_index_url_path @@ -54,16 +54,6 @@ def get_remote_time(remote_url: str): return None -def get_local_time(local_path: str): - marker_file = os.path.join(config.DATA_DIR, ".photon-index-updated") - if os.path.exists(marker_file): - return os.path.getmtime(marker_file) - - if not os.path.exists(local_path): - return 0.0 - return os.path.getmtime(local_path) - - def compare_mtime() -> bool: try: index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) @@ -79,11 +69,9 @@ def compare_mtime() -> bool: logging.warning("Could not determine remote time. Assuming no update is needed.") return False - marker_file = os.path.join(config.DATA_DIR, ".photon-index-updated") - using_marker_file = os.path.exists(marker_file) + using_marker_file = index.has_update_timestamp() - local_timestamp = get_local_time(config.OS_NODE_DIR) - local_dt = datetime.datetime.fromtimestamp(local_timestamp, tz=datetime.UTC) + local_dt = datetime.datetime.fromtimestamp(index.last_updated(), tz=datetime.UTC) logging.debug(f"Remote index time: {remote_dt}") logging.debug(f"Local index time: {local_dt}") @@ -107,7 +95,7 @@ def check_index_age() -> bool: logging.warning(f"Invalid MIN_INDEX_DATE format: {config.MIN_INDEX_DATE}. Expected DD.MM.YY") return True - local_timestamp = get_local_time(config.OS_NODE_DIR) + local_timestamp = index.last_updated() if local_timestamp == 0.0: logging.info("No local index found, update required") return True diff --git a/src/downloader.py b/src/downloader.py index ad458630..12517e5c 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -8,17 +8,8 @@ from requests.exceptions import RequestException from tqdm import tqdm -from src.check_remote import RemoteFileSizeError, get_local_time, get_remote_file_size -from src.filesystem import clear_temp_dir, extract_index, move_index, verify_checksum from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import get_index_url_path -from src.utils.sanitize import sanitize_url - - -class InsufficientSpaceError(Exception): - pass - logging = get_logger() @@ -83,6 +74,29 @@ def check_disk_space_requirements(download_size: int, is_parallel: bool = True) return True +def prepare_temp_dir(): + if os.path.isdir(config.TEMP_DIR): + logging.debug(f"Temporary directory {config.TEMP_DIR} exists. Attempting to remove it.") + try: + shutil.rmtree(config.TEMP_DIR) + except Exception as e: + logging.error(f"Failed to remove existing TEMP_DIR: {e}") + raise + + logging.debug(f"Creating temporary directory: {config.TEMP_DIR}") + os.makedirs(config.TEMP_DIR, exist_ok=True) + + +def clear_temp_dir(): + logging.info("Removing TEMP dir") + if not os.path.exists(config.TEMP_DIR): + return + try: + shutil.rmtree(config.TEMP_DIR) + except Exception: + logging.exception("Failed to Remove TEMP_DIR") + + def get_download_state_file(destination: str) -> str: return destination + ".download_state" @@ -151,172 +165,6 @@ def supports_range_requests(url: str) -> bool: return False -def get_download_url() -> str: - if config.FILE_URL: - logging.info("Using custom FILE_URL for download: %s", sanitize_url(config.FILE_URL)) - return config.FILE_URL - - index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) - download_url = config.BASE_URL + index_path - logging.info("Using constructed location for download: %s", download_url) - return download_url - - -def parallel_update(): - logging.info("Starting parallel update process...") - - try: - if os.path.isdir(config.TEMP_DIR): - logging.debug(f"Temporary directory {config.TEMP_DIR} exists. Attempting to remove it.") - try: - shutil.rmtree(config.TEMP_DIR) - logging.debug(f"Successfully removed directory: {config.TEMP_DIR}") - except Exception as e: - logging.error(f"Failed to remove existing TEMP_DIR: {e}") - raise - - logging.debug(f"Creating temporary directory: {config.TEMP_DIR}") - os.makedirs(config.TEMP_DIR, exist_ok=True) - - download_url = get_download_url() - - try: - file_size = get_remote_file_size(download_url) - if not check_disk_space_requirements(file_size, is_parallel=True): - logging.error("Insufficient disk space for parallel update") - raise InsufficientSpaceError("Insufficient disk space for parallel update") - except RemoteFileSizeError as e: - if config.SKIP_SPACE_CHECK: - logging.warning(f"{e}") - logging.warning("SKIP_SPACE_CHECK is enabled, proceeding without space check") - else: - logging.error(f"{e}") - logging.error( - "Cannot proceed without verifying disk space. " - "Set SKIP_SPACE_CHECK=true to bypass this check (not recommended)." - ) - raise - - logging.info("Downloading index") - - index_file = download_index() - - extract_index(index_file) - - if not config.SKIP_MD5_CHECK: - md5_file = download_md5() - - logging.info("Verifying checksum...") - verify_checksum(md5_file, index_file) - - logging.debug("Checksum verification successful.") - - logging.info("Moving Index") - move_index() - clear_temp_dir() - - logging.info("Parallel update process completed successfully.") - - except Exception as e: - logging.error(f"FATAL: Update process failed with an error: {e}") - logging.error("Aborting script.") - sys.exit(1) - - -def sequential_update(): - logging.info("Starting sequential download process...") - - try: - if os.path.isdir(config.TEMP_DIR): - logging.debug(f"Temporary directory {config.TEMP_DIR} exists. Attempting to remove it.") - try: - shutil.rmtree(config.TEMP_DIR) - logging.debug(f"Successfully removed directory: {config.TEMP_DIR}") - except Exception as e: - logging.error(f"Failed to remove existing TEMP_DIR: {e}") - raise - - logging.debug(f"Creating temporary directory: {config.TEMP_DIR}") - os.makedirs(config.TEMP_DIR, exist_ok=True) - - download_url = get_download_url() - - try: - file_size = get_remote_file_size(download_url) - if not check_disk_space_requirements(file_size, is_parallel=False): - logging.error("Insufficient disk space for sequential update") - raise InsufficientSpaceError("Insufficient disk space for sequential update") - except RemoteFileSizeError as e: - if config.SKIP_SPACE_CHECK: - logging.warning(f"{e}") - logging.warning("SKIP_SPACE_CHECK is enabled, proceeding without space check") - else: - logging.error(f"{e}") - logging.error( - "Cannot proceed without verifying disk space. " - "Set SKIP_SPACE_CHECK=true to bypass this check (not recommended)." - ) - raise - - logging.info("Downloading new index and MD5 checksum...") - index_file = download_index() - extract_index(index_file) - - if not config.SKIP_MD5_CHECK: - md5_file = download_md5() - - logging.info("Verifying checksum...") - verify_checksum(md5_file, index_file) - - logging.debug("Checksum verification successful.") - - logging.info("Moving new index into place...") - move_index() - - clear_temp_dir() - - logging.info("Sequential download process completed successfully.") - - except Exception as e: - logging.critical(f"FATAL: Update process failed with an error: {e}") - logging.critical("Aborting script.") - sys.exit(1) - - -def download_index() -> str: - output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}" - download_url = get_download_url() - - output = os.path.join(config.TEMP_DIR, output_file) - - if not download_file(download_url, output): - raise Exception(f"Failed to download index from {download_url}") - - local_timestamp = get_local_time(config.OS_NODE_DIR) - - logging.debug(f"New index timestamp: {local_timestamp}") - return output - - -def download_md5(): - if config.MD5_URL: - # MD5 URL provided, use it directly. - logging.info("Using custom MD5_URL for checksum: %s", sanitize_url(config.MD5_URL)) - download_url = config.MD5_URL - else: - md5_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + ".md5" - download_url = config.BASE_URL + md5_path - logging.info("Using constructed URL for checksum: %s", download_url) - - output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}.md5" - output = os.path.join(config.TEMP_DIR, output_file) - - if not download_file(download_url, output): - raise Exception(f"Failed to download MD5 checksum from {sanitize_url(download_url)}") - - return output - - def _prepare_download(url, destination): """Prepare download parameters including resume position.""" state = load_download_state(destination) @@ -502,6 +350,10 @@ def download_file(url, destination): logging.exception(f"Download failed after {max_retries} attempts") return False + except OSError: + logging.exception("Download failed due to a local filesystem error") + raise + except Exception: logging.exception("Download failed") return False diff --git a/src/entrypoint.py b/src/entrypoint.py index ff2d81b0..f6e56888 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -1,12 +1,9 @@ -import os -import sys - +from src import index from src.check_remote import check_index_age -from src.downloader import InsufficientSpaceError, parallel_update, sequential_update -from src.filesystem import reconcile_interrupted_import from src.importer import run_jsonl_import +from src.update import InsufficientSpaceError, run_update from src.utils import config -from src.utils.logger import get_logger, setup_logging +from src.utils.logger import get_logger from src.utils.notify import send_notification from src.utils.sanitize import sanitize_url from src.utils.validate_config import validate_config @@ -53,31 +50,22 @@ def run_update_or_import(force_update: bool = False) -> None: if not force_update: logger.info("Starting initial download using sequential strategy") logger.info("Note: Initial download will use sequential strategy regardless of config setting") - sequential_update() + run_update("SEQUENTIAL") return - if config.UPDATE_STRATEGY == "PARALLEL": - parallel_update() - else: - sequential_update() + run_update("PARALLEL" if config.UPDATE_STRATEGY == "PARALLEL" else "SEQUENTIAL") -def main(): +def run_setup() -> None: send_notification("Photon-Docker Initializing") - logger.debug("Entrypoint setup called") log_config() - - try: - validate_config() - except ValueError as e: - logger.error(f"Stopping due to invalid configuration.\n{e}") - sys.exit(1) + validate_config() if config.MIN_INDEX_DATE: logger.info(f"MIN_INDEX_DATE: {config.MIN_INDEX_DATE}") - reconcile_interrupted_import() + index.reconcile() if config.FORCE_UPDATE: logger.info("Starting forced update") @@ -86,11 +74,10 @@ def main(): except InsufficientSpaceError as e: logger.error(f"Cannot proceed with force update: {e}") send_notification(f"Photon-Docker force update failed: {e}") - sys.exit(75) - except Exception: - logger.error("Force update failed") raise - elif not os.path.isdir(config.OS_NODE_DIR): + return + + if not index.is_present(): if not config.INITIAL_DOWNLOAD: logger.warning("Initial download is disabled but no existing Photon index was found. ") return @@ -99,30 +86,20 @@ def main(): except InsufficientSpaceError as e: logger.error(f"Cannot proceed: {e}") send_notification(f"Photon-Docker cannot start: {e}") - sys.exit(75) - except Exception: - logger.error("Initial setup failed") raise - else: - logger.info("Existing index found, skipping download") + return - if config.IMPORT_MODE == "jsonl": - logger.info("JSONL mode with existing index found, skipping automatic rebuild during setup") - return + logger.info("Existing index found, skipping download") + + if config.IMPORT_MODE == "jsonl": + logger.info("JSONL mode with existing index found, skipping automatic rebuild during setup") + return - if config.MIN_INDEX_DATE and check_index_age(): - logger.info("Index is older than minimum required date, starting sequential update") - try: - sequential_update() - except InsufficientSpaceError as e: - logger.error(f"Cannot proceed with minimum date update: {e}") - send_notification(f"Photon-Docker minimum date update failed: {e}") - sys.exit(75) - except Exception: - logger.error("Minimum date update failed") - raise - - -if __name__ == "__main__": - setup_logging() - main() + if config.MIN_INDEX_DATE and check_index_age(): + logger.info("Index is older than minimum required date, starting sequential update") + try: + run_update("SEQUENTIAL") + except InsufficientSpaceError as e: + logger.error(f"Cannot proceed with minimum date update: {e}") + send_notification(f"Photon-Docker minimum date update failed: {e}") + raise diff --git a/src/filesystem.py b/src/filesystem.py deleted file mode 100644 index 70dbf31f..00000000 --- a/src/filesystem.py +++ /dev/null @@ -1,252 +0,0 @@ -import hashlib -import os -import shutil -import subprocess -from pathlib import Path - -from src.utils import config -from src.utils.logger import get_logger - -logging = get_logger() - - -def extract_index(index_file: str): - logging.info("Extracting Index") - logging.debug(f"Index file: {index_file}") - logging.debug(f"Index file exists: {os.path.exists(index_file)}") - logging.debug(f"Index file size: {os.path.getsize(index_file) if os.path.exists(index_file) else 'N/A'}") - logging.debug(f"Temp directory: {config.TEMP_DIR}") - logging.debug(f"Temp directory exists: {os.path.exists(config.TEMP_DIR)}") - - if not os.path.exists(config.TEMP_DIR): - logging.debug(f"Creating temp directory: {config.TEMP_DIR}") - os.makedirs(config.TEMP_DIR, exist_ok=True) - - install_command = f"lbzip2 -d -c {index_file} | tar x -o -C {config.TEMP_DIR}" - logging.debug(f"Extraction command: {install_command}") - - try: - logging.debug("Starting extraction process...") - result = subprocess.run(install_command, shell=True, capture_output=True, text=True, check=True) # noqa S602 - logging.debug("Extraction process completed successfully") - - if result.stdout: - logging.debug(f"Extraction stdout: {result.stdout}") - if result.stderr: - logging.debug(f"Extraction stderr: {result.stderr}") - - logging.debug(f"Contents of {config.TEMP_DIR} after extraction:") - try: - for item in os.listdir(config.TEMP_DIR): - item_path = os.path.join(config.TEMP_DIR, item) - if os.path.isdir(item_path): - logging.debug(f" DIR: {item}") - try: - sub_items = os.listdir(item_path) - logging.debug(f" Contains {len(sub_items)} items") - for sub_item in sub_items[:5]: - logging.debug(f" {sub_item}") - if len(sub_items) > 5: - logging.debug(f" ... and {len(sub_items) - 5} more items") - except Exception as e: - logging.debug(f" Could not list subdirectory contents: {e}") - else: - logging.debug(f" FILE: {item} ({os.path.getsize(item_path)} bytes)") - except Exception as e: - logging.debug(f"Could not list contents of {config.TEMP_DIR}: {e}") - - except subprocess.CalledProcessError as e: - logging.error(f"Index extraction failed with return code {e.returncode}") - logging.error(f"Command: {e.cmd}") - logging.error(f"Stdout: {e.stdout}") - logging.error(f"Stderr: {e.stderr}") - raise - except Exception: - logging.exception("Index extraction failed") - raise - - -def move_index(): - temp_photon_dir = os.path.join(config.TEMP_DIR, "photon_data") - target_node_dir = os.path.join(config.PHOTON_DATA_DIR) - - logging.info(f"Moving index from {temp_photon_dir} to {target_node_dir}") - result = move_index_atomic(temp_photon_dir, target_node_dir) - - if result: - update_timestamp_marker() - - return result - - -def move_index_atomic(source_dir: str, target_dir: str) -> bool: - try: - logging.info("Starting atomic index move operation") - - os.makedirs(os.path.dirname(target_dir), exist_ok=True) - - staging_dir = target_dir + ".staging" - backup_dir = target_dir + ".backup" - - cleanup_staging_and_temp_backup(staging_dir, backup_dir) - - shutil.move(source_dir, staging_dir) - - if os.path.exists(target_dir): - os.rename(target_dir, backup_dir) - - os.rename(staging_dir, target_dir) - logging.info("Atomic index move completed successfully") - - return True - - except Exception as e: - logging.error(f"Atomic move failed: {e}") - rollback_atomic_move(source_dir, target_dir, staging_dir, backup_dir) - raise - - -def rollback_atomic_move(original_source: str, target_dir: str, staging_dir: str, backup_dir: str): - logging.error("Rolling back atomic move operation") - - try: - if os.path.exists(target_dir) and not os.path.exists(backup_dir): - logging.debug("New index was successfully moved, keeping it") - return - - if os.path.exists(target_dir): - shutil.rmtree(target_dir) - - if os.path.exists(backup_dir): - logging.info("Restoring backup after failed atomic move") - os.rename(backup_dir, target_dir) - - if os.path.exists(staging_dir): - shutil.move(staging_dir, original_source) - - logging.info("Rollback completed successfully") - - except Exception as rollback_error: - logging.critical(f"Rollback failed: {rollback_error}") - - -def cleanup_staging_and_temp_backup(staging_dir: str, backup_dir: str): - for dir_path in [staging_dir, backup_dir]: - if os.path.exists(dir_path): - try: - shutil.rmtree(dir_path) - except Exception as e: - logging.warning(f"Failed to cleanup {dir_path}: {e}") - - -def cleanup_backup_after_verification(target_dir: str) -> bool: - backup_dir = target_dir + ".backup" - if os.path.exists(backup_dir): - try: - logging.info("Removing backup after successful verification") - shutil.rmtree(backup_dir) - return True - except Exception as e: - logging.warning(f"Failed to cleanup backup: {e}") - return False - return True - - -def verify_checksum(md5_file, index_file): - hash_md5 = hashlib.md5() # noqa S303 - try: - with open(index_file, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_md5.update(chunk) - dl_sum = hash_md5.hexdigest() - except FileNotFoundError: - logging.error(f"Index file not found for checksum generation: {index_file}") - raise - - try: - with open(md5_file) as f: - md5_sum = f.read().split()[0].strip() - except FileNotFoundError: - logging.error(f"MD5 file not found: {md5_file}") - raise - except IndexError: - logging.error(f"MD5 file is empty or malformed: {md5_file}") - raise - - if dl_sum == md5_sum: - logging.info("Checksum verified successfully.") - return True - - raise Exception(f"Checksum mismatch for {index_file}. Expected: {md5_sum}, Got: {dl_sum}") - - -def clear_temp_dir(): - logging.info("Removing TEMP dir") - if os.path.exists(config.TEMP_DIR): - logging.debug(f"Contents of TEMP directory {config.TEMP_DIR}:") - try: - for item in os.listdir(config.TEMP_DIR): - item_path = os.path.join(config.TEMP_DIR, item) - if os.path.isdir(item_path): - logging.debug(f" DIR: {item}") - else: - logging.debug(f" FILE: {item}") - except Exception as e: - logging.debug(f"Could not list contents of {config.TEMP_DIR}: {e}") - - try: - shutil.rmtree(config.TEMP_DIR) - except Exception: - logging.exception("Failed to Remove TEMP_DIR") - - -def update_timestamp_marker(): - marker_file = os.path.join(config.DATA_DIR, ".photon-index-updated") - try: - Path(marker_file).touch() - logging.info(f"Updated timestamp marker: {marker_file}") - except Exception as e: - logging.warning(f"Failed to update timestamp marker: {e}") - - -def _import_in_progress_marker() -> str: - return os.path.join(config.DATA_DIR, ".photon-import-in-progress") - - -def mark_import_in_progress(): - marker_file = _import_in_progress_marker() - try: - Path(marker_file).touch() - logging.debug(f"Marked import in progress: {marker_file}") - except Exception as e: - logging.warning(f"Failed to write import-in-progress marker: {e}") - - -def clear_import_in_progress(): - marker_file = _import_in_progress_marker() - try: - Path(marker_file).unlink(missing_ok=True) - except Exception as e: - logging.warning(f"Failed to clear import-in-progress marker: {e}") - - -def import_was_interrupted() -> bool: - return os.path.exists(_import_in_progress_marker()) - - -def remove_incomplete_index(): - if os.path.isdir(config.PHOTON_DATA_DIR): - logging.warning(f"Removing incomplete index at {config.PHOTON_DATA_DIR}") - shutil.rmtree(config.PHOTON_DATA_DIR) - - -def reconcile_interrupted_import(): - if not import_was_interrupted(): - return - - logging.warning( - "Detected an interrupted import (in-progress marker present). " - "Removing the partial index so a clean import can run." - ) - remove_incomplete_index() - clear_import_in_progress() diff --git a/src/importer.py b/src/importer.py index c0fda198..ba072abf 100644 --- a/src/importer.py +++ b/src/importer.py @@ -2,7 +2,8 @@ import shlex import subprocess -from src.filesystem import clear_import_in_progress, clear_temp_dir, mark_import_in_progress, update_timestamp_marker +from src.downloader import clear_temp_dir +from src.index import begin_import, complete_import from src.jsonl.decompressor import stream_decompress from src.jsonl.downloader import download_jsonl from src.utils import config @@ -20,7 +21,7 @@ def run_jsonl_import() -> None: parent_region = get_jsonl_parent_region(regions) country_codes = get_country_codes_for_regions(regions) if len(regions) > 1 else None - mark_import_in_progress() + begin_import() try: jsonl_path = download_jsonl(parent_region) import_proc = _start_photon_import("-", country_codes=country_codes) @@ -38,8 +39,7 @@ def run_jsonl_import() -> None: import_proc.kill() import_proc.wait() raise - update_timestamp_marker() - clear_import_in_progress() + complete_import() finally: clear_temp_dir() diff --git a/src/index.py b/src/index.py new file mode 100644 index 00000000..7eb844f0 --- /dev/null +++ b/src/index.py @@ -0,0 +1,151 @@ +import os +import shutil +from pathlib import Path + +from src.utils import config +from src.utils.logger import get_logger + +logging = get_logger() + + +class IndexRollbackError(Exception): + pass + + +def _updated_marker() -> str: + return os.path.join(config.DATA_DIR, ".photon-index-updated") + + +def _import_in_progress_marker() -> str: + return os.path.join(config.DATA_DIR, ".photon-import-in-progress") + + +def is_present() -> bool: + return os.path.isdir(config.OS_NODE_DIR) + + +def has_update_timestamp() -> bool: + return os.path.exists(_updated_marker()) + + +def last_updated() -> float: + marker_file = _updated_marker() + if os.path.exists(marker_file): + return os.path.getmtime(marker_file) + + if not os.path.exists(config.OS_NODE_DIR): + return 0.0 + return os.path.getmtime(config.OS_NODE_DIR) + + +def mark_updated(): + marker_file = _updated_marker() + try: + Path(marker_file).touch() + logging.info(f"Updated timestamp marker: {marker_file}") + except Exception as e: + logging.warning(f"Failed to update timestamp marker: {e}") + + +def begin_import(): + marker_file = _import_in_progress_marker() + Path(marker_file).touch() + logging.debug(f"Marked import in progress: {marker_file}") + + +def complete_import(): + mark_updated() + _clear_import_marker() + + +def import_was_interrupted() -> bool: + return os.path.exists(_import_in_progress_marker()) + + +def reconcile(): + if not import_was_interrupted(): + return + + logging.warning( + "Detected an interrupted import (in-progress marker present). " + "Removing the partial index so a clean import can run." + ) + if os.path.isdir(config.PHOTON_DATA_DIR): + logging.warning(f"Removing incomplete index at {config.PHOTON_DATA_DIR}") + shutil.rmtree(config.PHOTON_DATA_DIR) + _clear_import_marker() + + +def activate(source_dir: str): + target_dir = config.PHOTON_DATA_DIR + staging_dir = target_dir + ".staging" + backup_dir = target_dir + ".backup" + + try: + logging.info(f"Activating new index from {source_dir}") + + os.makedirs(os.path.dirname(target_dir), exist_ok=True) + + _cleanup_staging_and_stale_backup(staging_dir, backup_dir) + + shutil.move(source_dir, staging_dir) + + if os.path.exists(target_dir): + os.rename(target_dir, backup_dir) + + os.rename(staging_dir, target_dir) + logging.info("Atomic index move completed successfully") + + except Exception as e: + logging.error(f"Index activation failed: {e}") + _rollback_activation(source_dir, target_dir, staging_dir, backup_dir) + raise + + mark_updated() + + +def drop_backup() -> bool: + backup_dir = config.PHOTON_DATA_DIR + ".backup" + if os.path.exists(backup_dir): + try: + logging.info("Removing backup after successful verification") + shutil.rmtree(backup_dir) + return True + except Exception as e: + logging.warning(f"Failed to cleanup backup: {e}") + return False + return True + + +def _clear_import_marker(): + Path(_import_in_progress_marker()).unlink(missing_ok=True) + + +def _cleanup_staging_and_stale_backup(staging_dir: str, backup_dir: str): + for dir_path in [staging_dir, backup_dir]: + if os.path.exists(dir_path): + logging.info(f"Removing leftover directory before activation: {dir_path}") + shutil.rmtree(dir_path) + + +def _rollback_activation(original_source: str, target_dir: str, staging_dir: str, backup_dir: str): + logging.error("Rolling back index activation") + + try: + if os.path.exists(backup_dir): + if os.path.exists(target_dir): + shutil.rmtree(target_dir) + logging.info("Restoring backup after failed activation") + os.rename(backup_dir, target_dir) + + if os.path.exists(staging_dir) and not os.path.exists(original_source): + shutil.move(staging_dir, original_source) + + logging.info("Rollback completed successfully") + + except Exception as rollback_error: + logging.critical(f"Rollback failed, index state may be inconsistent: {rollback_error}") + raise IndexRollbackError( + f"Rollback after failed index activation also failed: {rollback_error}. " + "Index state may be inconsistent and require manual intervention." + ) from rollback_error diff --git a/src/process_manager.py b/src/process_manager.py index ae26f881..5674436a 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -13,10 +13,12 @@ import schedule from requests.exceptions import RequestException +from src import index, update from src.check_remote import compare_mtime -from src.filesystem import cleanup_backup_after_verification, reconcile_interrupted_import +from src.entrypoint import run_setup from src.utils import config from src.utils.logger import get_logger, setup_logging +from src.utils.notify import send_notification logger = get_logger() @@ -77,14 +79,6 @@ def handle_shutdown(self, signum, _frame): self.should_exit = True self.shutdown() - def run_initial_setup(self): - logger.info("Running initial setup...") - result = subprocess.run([sys.executable, "-m", "src.entrypoint", "setup"], check=False, cwd="/photon") # noqa S603 - - if result.returncode != 0: - logger.error("Setup failed!") - sys.exit(1) - def start_photon(self, max_startup_retries=3): for attempt in range(max_startup_retries): logger.info(f"Starting Photon (attempt {attempt + 1}/{max_startup_retries})...") @@ -213,48 +207,44 @@ def run_update(self): logger.info(f"Running {config.UPDATE_STRATEGY.lower()} update...") update_start = time.time() - if not compare_mtime(): - update_duration = time.time() - update_start - logger.info(f"Index already up to date - no restart needed ({update_duration:.1f}s)") - self.state = AppState.RUNNING - return + try: + if not compare_mtime(): + update_duration = time.time() - update_start + logger.info(f"Index already up to date - no restart needed ({update_duration:.1f}s)") + return - if config.UPDATE_STRATEGY == "SEQUENTIAL": - self.stop_photon() + if config.UPDATE_STRATEGY == "SEQUENTIAL": + self.stop_photon() - result = subprocess.run([sys.executable, "-m", "src.updater"], check=False, cwd="/photon") # noqa S603 + try: + update.run_update(config.UPDATE_STRATEGY) + except Exception as e: + update_duration = time.time() - update_start + logger.error(f"Update failed: {e} ({update_duration:.1f}s)") + send_notification(f"Photon Update Failed - {e}") + if not self.photon_process: + logger.info("Attempting to restart Photon after failed update") + if not self.start_photon(): + logger.error("Failed to restart Photon after update failure") + return - if result.returncode == 0: logger.info("Update process completed, verifying Photon health...") - if config.UPDATE_STRATEGY == "PARALLEL": - self.stop_photon() - if self.start_photon(): - update_duration = time.time() - update_start - logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") - target_node_dir = os.path.join(config.PHOTON_DATA_DIR) - cleanup_backup_after_verification(target_node_dir) - else: - update_duration = time.time() - update_start - logger.error(f"Update failed - Photon health check failed after restart ({update_duration:.1f}s)") - elif config.UPDATE_STRATEGY == "SEQUENTIAL": - if self.start_photon(): - update_duration = time.time() - update_start - logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") - target_node_dir = os.path.join(config.PHOTON_DATA_DIR) - cleanup_backup_after_verification(target_node_dir) - else: - update_duration = time.time() - update_start - logger.error(f"Update failed - Photon health check failed after restart ({update_duration:.1f}s)") - else: - update_duration = time.time() - update_start - logger.error(f"Update process failed with code {result.returncode} ({update_duration:.1f}s)") - if config.UPDATE_STRATEGY == "SEQUENTIAL" and not self.photon_process: - logger.info("Attempting to restart Photon after failed update") - if not self.start_photon(): - logger.error("Failed to restart Photon after update failure") - - self.state = AppState.RUNNING + self.stop_photon() + if self.start_photon(): + update_duration = time.time() - update_start + logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") + send_notification("Photon Index Updated Successfully") + index.drop_backup() + else: + update_duration = time.time() - update_start + logger.error(f"Update failed - Photon health check failed after restart ({update_duration:.1f}s)") + send_notification("Photon Update Failed - health check failed after index swap, service may be down") + except Exception: + logger.exception("Update run failed unexpectedly") + send_notification("Photon Update Failed - unexpected error during update run") + finally: + self.state = AppState.RUNNING def schedule_updates(self): if config.UPDATE_STRATEGY == "DISABLED": @@ -285,12 +275,18 @@ def schedule_updates(self): def scheduler_loop(): while not self.should_exit: - schedule.run_pending() + self._run_pending_jobs() time.sleep(1) thread = threading.Thread(target=scheduler_loop, daemon=True) thread.start() + def _run_pending_jobs(self): + try: + schedule.run_pending() + except Exception: + logger.exception("Scheduled job raised unexpectedly; scheduler continues running") + def monitor_photon(self): while not self.should_exit: if self.photon_process and self.state == AppState.RUNNING: @@ -310,12 +306,14 @@ def shutdown(self): def run(self): logger.info("Photon Manager starting...") - reconcile_interrupted_import() - - if not config.FORCE_UPDATE and os.path.isdir(config.OS_NODE_DIR): - logger.info("Existing index found, skipping initial setup") - else: - self.run_initial_setup() + try: + run_setup() + except update.InsufficientSpaceError: + logger.error("Setup failed: insufficient disk space") + sys.exit(75) + except Exception: + logger.exception("Setup failed!") + sys.exit(1) if not self.start_photon(): logger.error("Failed to start Photon during initial startup") diff --git a/src/update.py b/src/update.py new file mode 100644 index 00000000..627e015e --- /dev/null +++ b/src/update.py @@ -0,0 +1,168 @@ +import hashlib +import os +import subprocess + +from src import index +from src.check_remote import RemoteFileSizeError, get_remote_file_size +from src.downloader import check_disk_space_requirements, clear_temp_dir, download_file, prepare_temp_dir +from src.utils import config +from src.utils.logger import get_logger +from src.utils.regions import get_index_url_path +from src.utils.sanitize import sanitize_url + +logging = get_logger() + + +class UpdateError(Exception): + pass + + +class InsufficientSpaceError(UpdateError): + pass + + +class DownloadError(UpdateError): + pass + + +class ExtractionError(UpdateError): + pass + + +class ChecksumMismatchError(UpdateError): + pass + + +def get_download_url() -> str: + if config.FILE_URL: + logging.info("Using custom FILE_URL for download: %s", sanitize_url(config.FILE_URL)) + return config.FILE_URL + + index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + download_url = config.BASE_URL + index_path + logging.info("Using constructed location for download: %s", download_url) + return download_url + + +def download_index() -> str: + output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}" + download_url = get_download_url() + + output = os.path.join(config.TEMP_DIR, output_file) + + if not download_file(download_url, output): + raise DownloadError(f"Failed to download index from {sanitize_url(download_url)}") + + return output + + +def download_md5() -> str: + if config.MD5_URL: + logging.info("Using custom MD5_URL for checksum: %s", sanitize_url(config.MD5_URL)) + download_url = config.MD5_URL + else: + md5_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + ".md5" + download_url = config.BASE_URL + md5_path + logging.info("Using constructed URL for checksum: %s", download_url) + + output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}.md5" + output = os.path.join(config.TEMP_DIR, output_file) + + if not download_file(download_url, output): + raise DownloadError(f"Failed to download MD5 checksum from {sanitize_url(download_url)}") + + return output + + +def extract_index(index_file: str): + logging.info("Extracting Index") + logging.debug(f"Index file: {index_file}") + + if not os.path.exists(config.TEMP_DIR): + os.makedirs(config.TEMP_DIR, exist_ok=True) + + install_command = f"lbzip2 -d -c {index_file} | tar x -o -C {config.TEMP_DIR}" + logging.debug(f"Extraction command: {install_command}") + + try: + result = subprocess.run( # noqa: S603 + ["/bin/bash", "-o", "pipefail", "-c", install_command], capture_output=True, text=True, check=True + ) + if result.stderr: + logging.debug(f"Extraction stderr: {result.stderr}") + except subprocess.CalledProcessError as e: + logging.error(f"Index extraction failed with return code {e.returncode}") + logging.error(f"Stderr: {e.stderr}") + raise ExtractionError(f"Index extraction failed with return code {e.returncode}") from e + + +def verify_checksum(md5_file: str, index_file: str) -> bool: + hash_md5 = hashlib.md5() # noqa S303 + try: + with open(index_file, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hash_md5.update(chunk) + dl_sum = hash_md5.hexdigest() + except FileNotFoundError: + logging.error(f"Index file not found for checksum generation: {index_file}") + raise + + try: + with open(md5_file) as f: + md5_sum = f.read().split()[0].strip() + except FileNotFoundError: + logging.error(f"MD5 file not found: {md5_file}") + raise + except IndexError: + logging.error(f"MD5 file is empty or malformed: {md5_file}") + raise + + if dl_sum == md5_sum: + logging.info("Checksum verified successfully.") + return True + + raise ChecksumMismatchError(f"Checksum mismatch for {index_file}. Expected: {md5_sum}, Got: {dl_sum}") + + +def _ensure_disk_space(download_url: str, parallel: bool): + try: + file_size = get_remote_file_size(download_url) + except RemoteFileSizeError as e: + if config.SKIP_SPACE_CHECK: + logging.warning(f"{e}") + logging.warning("SKIP_SPACE_CHECK is enabled, proceeding without space check") + return + logging.error(f"{e}") + logging.error( + "Cannot proceed without verifying disk space. " + "Set SKIP_SPACE_CHECK=true to bypass this check (not recommended)." + ) + raise UpdateError(str(e)) from e + + if not check_disk_space_requirements(file_size, is_parallel=parallel): + raise InsufficientSpaceError("Insufficient disk space for update") + + +def run_update(strategy: str): + logging.info(f"Starting {strategy.lower()} update pipeline...") + + prepare_temp_dir() + + download_url = get_download_url() + _ensure_disk_space(download_url, parallel=strategy == "PARALLEL") + + logging.info("Downloading index") + index_file = download_index() + + extract_index(index_file) + + if not config.SKIP_MD5_CHECK: + md5_file = download_md5() + logging.info("Verifying checksum...") + verify_checksum(md5_file, index_file) + + logging.info("Activating new index") + index.activate(os.path.join(config.TEMP_DIR, "photon_data")) + clear_temp_dir() + + logging.info("Update pipeline completed successfully.") diff --git a/src/updater.py b/src/updater.py deleted file mode 100644 index 0a20f735..00000000 --- a/src/updater.py +++ /dev/null @@ -1,41 +0,0 @@ -import sys - -from src.downloader import parallel_update, sequential_update -from src.utils import config -from src.utils.logger import get_logger, setup_logging -from src.utils.notify import send_notification - -logger = get_logger() - - -def main(): - logger.info("Starting update process...") - - try: - if config.IMPORT_MODE == "jsonl": - logger.info("Scheduled JSONL rebuilds are not implemented yet, skipping updater run") - return - - if config.UPDATE_STRATEGY == "PARALLEL": - logger.info("Running parallel update...") - parallel_update() - elif config.UPDATE_STRATEGY == "SEQUENTIAL": - logger.info("Running sequential update...") - sequential_update() - else: - logger.error(f"Unknown update strategy: {config.UPDATE_STRATEGY}") - sys.exit(1) - - logger.info("Update completed successfully") - send_notification("Photon Index Updated Successfully") - - except Exception as e: - error_msg = f"Update failed: {e!s}" - logger.exception(error_msg) - send_notification(f"Photon Update Failed - {error_msg}") - sys.exit(1) - - -if __name__ == "__main__": - setup_logging() - main() diff --git a/tests/jsonl/test_downloader.py b/tests/jsonl/test_jsonl_downloader.py similarity index 100% rename from tests/jsonl/test_downloader.py rename to tests/jsonl/test_jsonl_downloader.py diff --git a/tests/test_check_remote.py b/tests/test_check_remote.py index 9c71d00b..08350f59 100644 --- a/tests/test_check_remote.py +++ b/tests/test_check_remote.py @@ -98,24 +98,6 @@ def test_get_remote_time_returns_none_on_request_error(): assert check_remote.get_remote_time("https://example.com") is None -def test_get_local_time_returns_marker_mtime_when_present(fake_dirs: Path): - marker = fake_dirs / ".photon-index-updated" - marker.write_text("") - os.utime(marker, (1_000_000, 1_000_000)) - assert check_remote.get_local_time(str(fake_dirs / "missing")) == 1_000_000 - - -def test_get_local_time_returns_path_mtime_when_no_marker(fake_dirs: Path): - target = fake_dirs / "node_1" - target.mkdir() - os.utime(target, (2_000_000, 2_000_000)) - assert check_remote.get_local_time(str(target)) == 2_000_000 - - -def test_get_local_time_returns_zero_when_path_missing(fake_dirs: Path): - assert check_remote.get_local_time(str(fake_dirs / "missing")) == 0.0 - - def test_compare_mtime_returns_false_when_remote_time_unknown(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "REGION", None) monkeypatch.setattr(config, "BASE_URL", "https://example.com") diff --git a/tests/test_downloader.py b/tests/test_downloader.py index c55f8e8f..22863e70 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,5 +1,4 @@ import json -import os from pathlib import Path from unittest.mock import MagicMock, patch @@ -159,19 +158,45 @@ def test_supports_range_requests_false_on_error(): assert downloader.supports_range_requests("https://example.com/x") is False -def test_get_download_url_uses_file_url_when_set(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "FILE_URL", "https://override.example/file.tar.bz2") - assert downloader.get_download_url() == "https://override.example/file.tar.bz2" +def test_prepare_temp_dir_creates_fresh_dir(fake_dirs: Path): + assert not Path(config.TEMP_DIR).exists() + downloader.prepare_temp_dir() + assert Path(config.TEMP_DIR).exists() + + +def test_prepare_temp_dir_replaces_existing_dir(fake_dirs: Path): + temp = Path(config.TEMP_DIR) + temp.mkdir() + (temp / "stale.txt").write_text("stale") + + downloader.prepare_temp_dir() + + assert temp.exists() + assert not (temp / "stale.txt").exists() + + +def test_prepare_temp_dir_raises_when_removal_fails(fake_dirs: Path): + temp = Path(config.TEMP_DIR) + temp.mkdir() + with patch("src.downloader.shutil.rmtree", side_effect=OSError("locked")), pytest.raises(OSError, match="locked"): + downloader.prepare_temp_dir() -def test_get_download_url_constructs_from_region_and_base(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "FILE_URL", None) - monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") - monkeypatch.setattr(config, "REGION", "europe") - monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - url = downloader.get_download_url() - assert url == "https://example.com/public/europe/photon-db-europe-1.0-latest.tar.bz2" +def test_clear_temp_dir_removes_existing_temp(fake_dirs: Path): + temp = Path(config.TEMP_DIR) + temp.mkdir() + (temp / "file.txt").write_text("x") + (temp / "sub").mkdir() + (temp / "sub" / "nested").write_text("y") + + downloader.clear_temp_dir() + + assert not temp.exists() + + +def test_clear_temp_dir_handles_missing_temp_dir(fake_dirs: Path): + assert not Path(config.TEMP_DIR).exists() + downloader.clear_temp_dir() def test_prepare_download_no_state(tmp_path: Path): @@ -340,154 +365,11 @@ def test_download_file_returns_false_on_unexpected_exception(tmp_path: Path, mon assert downloader.download_file("https://example.com/x", str(dest)) is False -def test_download_index_returns_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") - Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) - - def fake_download(_url, output): - Path(output).write_bytes(b"x") - return True - - with patch("src.downloader.download_file", side_effect=fake_download): - out = downloader.download_index() - - assert out == os.path.join(config.TEMP_DIR, "photon-db-latest.tar.bz2") - assert Path(out).exists() - - -def test_download_index_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") - Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) - with ( - patch("src.downloader.download_file", return_value=False), - pytest.raises(Exception, match="Failed to download index"), - ): - downloader.download_index() - - -def test_download_md5_uses_explicit_url(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "MD5_URL", "https://example.com/custom.md5") - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) - - captured = {} - - def fake_download(url, output): - captured["url"] = url - captured["output"] = output - Path(output).write_text("md5") - return True - - with patch("src.downloader.download_file", side_effect=fake_download): - out = downloader.download_md5() - - assert captured["url"] == "https://example.com/custom.md5" - assert out.endswith("photon-db-latest.tar.bz2.md5") - - -def test_download_md5_constructs_url_when_unset(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "MD5_URL", None) - monkeypatch.setattr(config, "FILE_URL", None) - monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") - monkeypatch.setattr(config, "REGION", None) - monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) - - captured = {} - - def fake_download(url, output): - captured["url"] = url - Path(output).write_text("md5") - return True - - with patch("src.downloader.download_file", side_effect=fake_download): - downloader.download_md5() - - assert captured["url"] == "https://example.com/public/photon-db-planet-1.0-latest.tar.bz2.md5" - - -def test_download_md5_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "MD5_URL", "https://example.com/x.md5") - monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") - Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) +def test_download_file_propagates_oserror(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): + dest = tmp_path / "out.bin" + monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "1") with ( - patch("src.downloader.download_file", return_value=False), - pytest.raises(Exception, match="Failed to download MD5"), + patch("src.downloader.requests.get", side_effect=OSError("disk full")), + pytest.raises(OSError, match="disk full"), ): - downloader.download_md5() - - -def _make_orchestrator_patches(monkeypatch: pytest.MonkeyPatch): - fake_index = str(Path(config.TEMP_DIR) / "index.tar.bz2") - fake_md5 = fake_index + ".md5" - monkeypatch.setattr(downloader, "get_download_url", lambda: "https://example.com/x") - monkeypatch.setattr(downloader, "get_remote_file_size", lambda _: 1024) - monkeypatch.setattr(downloader, "check_disk_space_requirements", lambda *_, **__: True) - monkeypatch.setattr(downloader, "download_index", lambda: fake_index) - monkeypatch.setattr(downloader, "download_md5", lambda: fake_md5) - monkeypatch.setattr(downloader, "extract_index", lambda _: None) - monkeypatch.setattr(downloader, "verify_checksum", lambda *_: True) - monkeypatch.setattr(downloader, "move_index", lambda: True) - monkeypatch.setattr(downloader, "clear_temp_dir", lambda: None) - - -def test_parallel_update_happy_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) - _make_orchestrator_patches(monkeypatch) - downloader.parallel_update() - assert Path(config.TEMP_DIR).exists() - - -def test_parallel_update_skips_md5_when_configured(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) - _make_orchestrator_patches(monkeypatch) - - md5_called = {"n": 0} - - def fake_md5(): - md5_called["n"] += 1 - return str(Path(config.TEMP_DIR) / "x.md5") - - monkeypatch.setattr(downloader, "download_md5", fake_md5) - downloader.parallel_update() - assert md5_called["n"] == 0 - - -def test_parallel_update_raises_on_insufficient_space(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - _make_orchestrator_patches(monkeypatch) - monkeypatch.setattr(downloader, "check_disk_space_requirements", lambda *_, **__: False) - with pytest.raises(SystemExit): - downloader.parallel_update() - - -def test_parallel_update_skip_space_check_proceeds_on_size_error(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "SKIP_SPACE_CHECK", True) - monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) - _make_orchestrator_patches(monkeypatch) - - def boom(_url): - raise downloader.RemoteFileSizeError("no size") - - monkeypatch.setattr(downloader, "get_remote_file_size", boom) - downloader.parallel_update() - - -def test_sequential_update_happy_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) - _make_orchestrator_patches(monkeypatch) - downloader.sequential_update() - - -def test_sequential_update_raises_on_size_error_without_skip(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "SKIP_SPACE_CHECK", False) - _make_orchestrator_patches(monkeypatch) - - def boom(_url): - raise downloader.RemoteFileSizeError("no size") - - monkeypatch.setattr(downloader, "get_remote_file_size", boom) - with pytest.raises(SystemExit): - downloader.sequential_update() + downloader.download_file("https://example.com/x", str(dest)) diff --git a/tests/test_entrypoint.py b/tests/test_entrypoint.py index be5c39da..52eca057 100644 --- a/tests/test_entrypoint.py +++ b/tests/test_entrypoint.py @@ -4,13 +4,18 @@ import pytest from src import entrypoint -from src.downloader import InsufficientSpaceError +from src.update import InsufficientSpaceError from src.utils import config @pytest.fixture def base_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): - os_node_dir = tmp_path / "node_1" + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) monkeypatch.setattr(config, "IMPORT_MODE", "db") monkeypatch.setattr(config, "FORCE_UPDATE", False) @@ -20,224 +25,217 @@ def base_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path): monkeypatch.setattr(config, "FILE_URL", None) monkeypatch.setattr(config, "MD5_URL", None) monkeypatch.setattr(config, "APPRISE_URLS", None) - return os_node_dir + return Path(config.OS_NODE_DIR) def _patch_common(): return (patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config")) -def test_entrypoint_skips_download_when_index_present(base_config: Path): - base_config.mkdir() +def test_setup_skips_download_when_index_present(base_config: Path): + base_config.mkdir(parents=True) notify, validate = _patch_common() - with ( - notify as n, - validate as v, - patch("src.entrypoint.sequential_update") as seq, - patch("src.entrypoint.parallel_update") as par, - ): - entrypoint.main() + with notify as n, validate as v, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() n.assert_called() v.assert_called_once() - seq.assert_not_called() - par.assert_not_called() + run.assert_not_called() -def test_entrypoint_runs_initial_sequential_when_no_index(base_config: Path): +def test_setup_runs_initial_sequential_when_no_index(base_config: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") notify, validate = _patch_common() - with notify, validate, patch("src.entrypoint.sequential_update") as seq: - entrypoint.main() - seq.assert_called_once() + with notify, validate, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() + run.assert_called_once_with("SEQUENTIAL") -def test_entrypoint_skips_initial_when_disabled(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_skips_initial_when_disabled(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "INITIAL_DOWNLOAD", False) notify, validate = _patch_common() - with notify, validate, patch("src.entrypoint.sequential_update") as seq: - entrypoint.main() - seq.assert_not_called() + with notify, validate, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() + run.assert_not_called() -def test_entrypoint_force_update_uses_parallel_when_set(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_force_update_uses_parallel_when_set(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "FORCE_UPDATE", True) monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") notify, validate = _patch_common() - with ( - notify, - validate, - patch("src.entrypoint.parallel_update") as par, - patch("src.entrypoint.sequential_update") as seq, - ): - entrypoint.main() - par.assert_called_once() - seq.assert_not_called() + with notify, validate, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() + run.assert_called_once_with("PARALLEL") -def test_entrypoint_force_update_uses_sequential_when_not_parallel(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_force_update_uses_sequential_when_not_parallel(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "FORCE_UPDATE", True) monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") notify, validate = _patch_common() - with ( - notify, - validate, - patch("src.entrypoint.sequential_update") as seq, - patch("src.entrypoint.parallel_update") as par, - ): - entrypoint.main() - seq.assert_called_once() - par.assert_not_called() + with notify, validate, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() + run.assert_called_once_with("SEQUENTIAL") -def test_entrypoint_force_update_exits_on_insufficient_space(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_force_update_raises_and_notifies_on_insufficient_space( + base_config: Path, monkeypatch: pytest.MonkeyPatch +): monkeypatch.setattr(config, "FORCE_UPDATE", True) - monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") notify, validate = _patch_common() with ( - notify, + notify as n, validate, - patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no space")), - pytest.raises(SystemExit) as exc, + patch("src.entrypoint.run_update", side_effect=InsufficientSpaceError("no space")), + pytest.raises(InsufficientSpaceError), ): - entrypoint.main() - assert exc.value.code == 75 + entrypoint.run_setup() + + messages = [call.args[0] for call in n.call_args_list] + assert any("force update failed" in m for m in messages) -def test_entrypoint_force_update_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_force_update_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "FORCE_UPDATE", True) - monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") notify, validate = _patch_common() with ( notify, validate, - patch("src.entrypoint.sequential_update", side_effect=RuntimeError("boom")), + patch("src.entrypoint.run_update", side_effect=RuntimeError("boom")), pytest.raises(RuntimeError), ): - entrypoint.main() + entrypoint.run_setup() -def test_entrypoint_initial_download_exits_on_insufficient_space(base_config: Path): +def test_setup_initial_download_raises_and_notifies_on_insufficient_space(base_config: Path): notify, validate = _patch_common() with ( - notify, + notify as n, validate, - patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no space")), - pytest.raises(SystemExit) as exc, + patch("src.entrypoint.run_update", side_effect=InsufficientSpaceError("no space")), + pytest.raises(InsufficientSpaceError), ): - entrypoint.main() - assert exc.value.code == 75 + entrypoint.run_setup() + + messages = [call.args[0] for call in n.call_args_list] + assert any("cannot start" in m for m in messages) -def test_entrypoint_validate_config_failure_exits(base_config: Path): - base_config.mkdir() +def test_setup_validate_config_failure_raises(base_config: Path): + base_config.mkdir(parents=True) with ( patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config", side_effect=ValueError("bad")), - pytest.raises(SystemExit) as exc, + pytest.raises(ValueError, match="bad"), ): - entrypoint.main() - assert exc.value.code == 1 + entrypoint.run_setup() + + +def test_setup_reconciles_interrupted_import(base_config: Path): + base_config.mkdir(parents=True) + (base_config / "segment.bin").write_text("partial") + (Path(config.DATA_DIR) / ".photon-import-in-progress").write_text("") + + notify, validate = _patch_common() + with notify, validate, patch("src.entrypoint.run_update_or_import"): + entrypoint.run_setup() + assert not Path(config.PHOTON_DATA_DIR).exists() -def test_entrypoint_min_date_triggers_update(base_config: Path, monkeypatch: pytest.MonkeyPatch): - base_config.mkdir() + +def test_setup_min_date_triggers_update(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir(parents=True) monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") notify, validate = _patch_common() with ( notify, validate, patch("src.entrypoint.check_index_age", return_value=True), - patch("src.entrypoint.sequential_update") as seq, + patch("src.entrypoint.run_update") as run, ): - entrypoint.main() - seq.assert_called_once() + entrypoint.run_setup() + run.assert_called_once_with("SEQUENTIAL") -def test_entrypoint_min_date_skips_when_index_recent(base_config: Path, monkeypatch: pytest.MonkeyPatch): - base_config.mkdir() +def test_setup_min_date_skips_when_index_recent(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir(parents=True) monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") notify, validate = _patch_common() with ( notify, validate, patch("src.entrypoint.check_index_age", return_value=False), - patch("src.entrypoint.sequential_update") as seq, + patch("src.entrypoint.run_update") as run, ): - entrypoint.main() - seq.assert_not_called() + entrypoint.run_setup() + run.assert_not_called() -def test_entrypoint_min_date_exits_on_insufficient_space(base_config: Path, monkeypatch: pytest.MonkeyPatch): - base_config.mkdir() +def test_setup_min_date_raises_and_notifies_on_insufficient_space(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir(parents=True) monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") notify, validate = _patch_common() with ( - notify, + notify as n, validate, patch("src.entrypoint.check_index_age", return_value=True), - patch("src.entrypoint.sequential_update", side_effect=InsufficientSpaceError("no")), - pytest.raises(SystemExit) as exc, + patch("src.entrypoint.run_update", side_effect=InsufficientSpaceError("no")), + pytest.raises(InsufficientSpaceError), ): - entrypoint.main() - assert exc.value.code == 75 + entrypoint.run_setup() + + messages = [call.args[0] for call in n.call_args_list] + assert any("minimum date update failed" in m for m in messages) -def test_entrypoint_min_date_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): - base_config.mkdir() +def test_setup_min_date_propagates_unexpected_error(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir(parents=True) monkeypatch.setattr(config, "MIN_INDEX_DATE", "01.01.26") notify, validate = _patch_common() with ( notify, validate, patch("src.entrypoint.check_index_age", return_value=True), - patch("src.entrypoint.sequential_update", side_effect=RuntimeError("boom")), + patch("src.entrypoint.run_update", side_effect=RuntimeError("boom")), pytest.raises(RuntimeError), ): - entrypoint.main() + entrypoint.run_setup() -def test_entrypoint_logs_apprise_redacted_when_set( +def test_setup_logs_apprise_redacted_when_set( base_config: Path, monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCaptureFixture ): import logging as _logging - base_config.mkdir() + base_config.mkdir(parents=True) monkeypatch.setattr(config, "APPRISE_URLS", "tgram://abc") caplog.set_level(_logging.INFO, logger="root") with patch("src.entrypoint.send_notification"), patch("src.entrypoint.validate_config"): - entrypoint.main() + entrypoint.run_setup() assert any("APPRISE_URLS: REDACTED" in r.message for r in caplog.records) -def test_entrypoint_runs_jsonl_import_when_no_index(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_runs_jsonl_import_when_no_index(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") notify, validate = _patch_common() with notify, validate, patch("src.entrypoint.run_jsonl_import") as imp: - entrypoint.main() + entrypoint.run_setup() imp.assert_called_once() -def test_entrypoint_skips_jsonl_rebuild_when_index_present(base_config: Path, monkeypatch: pytest.MonkeyPatch): - base_config.mkdir() +def test_setup_skips_jsonl_rebuild_when_index_present(base_config: Path, monkeypatch: pytest.MonkeyPatch): + base_config.mkdir(parents=True) monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") notify, validate = _patch_common() with notify, validate, patch("src.entrypoint.run_jsonl_import") as imp: - entrypoint.main() + entrypoint.run_setup() imp.assert_not_called() -def test_entrypoint_force_update_runs_jsonl_import(base_config: Path, monkeypatch: pytest.MonkeyPatch): +def test_setup_force_update_runs_jsonl_import(base_config: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "IMPORT_MODE", "jsonl") monkeypatch.setattr(config, "FORCE_UPDATE", True) notify, validate = _patch_common() - with ( - notify, - validate, - patch("src.entrypoint.run_jsonl_import") as imp, - patch("src.entrypoint.sequential_update") as seq, - patch("src.entrypoint.parallel_update") as par, - ): - entrypoint.main() + with notify, validate, patch("src.entrypoint.run_jsonl_import") as imp, patch("src.entrypoint.run_update") as run: + entrypoint.run_setup() imp.assert_called_once() - seq.assert_not_called() - par.assert_not_called() + run.assert_not_called() diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py deleted file mode 100644 index 67376023..00000000 --- a/tests/test_filesystem.py +++ /dev/null @@ -1,346 +0,0 @@ -import hashlib -import os -import subprocess -from pathlib import Path -from unittest.mock import patch - -import pytest - -from src import filesystem -from src.utils import config - - -@pytest.fixture -def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: - data_dir = tmp_path / "data" - photon_data_dir = data_dir / "photon_data" - temp_dir = data_dir / "temp" - os_node_dir = photon_data_dir / "node_1" - data_dir.mkdir() - - monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) - monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) - monkeypatch.setattr(config, "TEMP_DIR", str(temp_dir)) - monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) - return data_dir - - -def test_verify_checksum_returns_true_on_match(tmp_path: Path): - index_file = tmp_path / "index.bin" - index_file.write_bytes(b"hello world") - expected = hashlib.md5(b"hello world").hexdigest() # noqa: S324 - md5_file = tmp_path / "index.bin.md5" - md5_file.write_text(f"{expected} index.bin\n") - - assert filesystem.verify_checksum(str(md5_file), str(index_file)) is True - - -def test_verify_checksum_raises_on_mismatch(tmp_path: Path): - index_file = tmp_path / "index.bin" - index_file.write_bytes(b"hello world") - md5_file = tmp_path / "index.bin.md5" - md5_file.write_text("00000000000000000000000000000000 index.bin\n") - - with pytest.raises(Exception, match="Checksum mismatch"): - filesystem.verify_checksum(str(md5_file), str(index_file)) - - -def test_verify_checksum_raises_when_index_missing(tmp_path: Path): - md5_file = tmp_path / "x.md5" - md5_file.write_text("0" * 32) - with pytest.raises(FileNotFoundError): - filesystem.verify_checksum(str(md5_file), str(tmp_path / "missing")) - - -def test_verify_checksum_raises_when_md5_missing(tmp_path: Path): - index_file = tmp_path / "index.bin" - index_file.write_bytes(b"data") - with pytest.raises(FileNotFoundError): - filesystem.verify_checksum(str(tmp_path / "missing.md5"), str(index_file)) - - -def test_verify_checksum_raises_on_empty_md5_file(tmp_path: Path): - index_file = tmp_path / "index.bin" - index_file.write_bytes(b"data") - md5_file = tmp_path / "empty.md5" - md5_file.write_text("") - with pytest.raises((IndexError, ValueError)): - filesystem.verify_checksum(str(md5_file), str(index_file)) - - -def test_clear_temp_dir_removes_existing_temp(fake_dirs: Path): - temp = Path(config.TEMP_DIR) - temp.mkdir() - (temp / "file.txt").write_text("x") - (temp / "sub").mkdir() - (temp / "sub" / "nested").write_text("y") - - filesystem.clear_temp_dir() - - assert not temp.exists() - - -def test_clear_temp_dir_handles_missing_temp_dir(fake_dirs: Path): - assert not Path(config.TEMP_DIR).exists() - filesystem.clear_temp_dir() - - -def test_update_timestamp_marker_creates_marker(fake_dirs: Path): - filesystem.update_timestamp_marker() - marker = Path(config.DATA_DIR) / ".photon-index-updated" - assert marker.exists() - - -def test_update_timestamp_marker_swallows_errors(fake_dirs: Path): - with patch("src.filesystem.Path.touch", side_effect=OSError("nope")): - filesystem.update_timestamp_marker() - - -def test_import_in_progress_marker_roundtrip(fake_dirs: Path): - assert filesystem.import_was_interrupted() is False - - filesystem.mark_import_in_progress() - assert filesystem.import_was_interrupted() is True - - filesystem.clear_import_in_progress() - assert filesystem.import_was_interrupted() is False - - -def test_clear_import_in_progress_is_idempotent(fake_dirs: Path): - filesystem.clear_import_in_progress() - assert filesystem.import_was_interrupted() is False - - -def test_remove_incomplete_index_removes_photon_data(fake_dirs: Path): - node_dir = Path(config.OS_NODE_DIR) - node_dir.mkdir(parents=True) - (node_dir / "segment.bin").write_text("partial") - - filesystem.remove_incomplete_index() - - assert not Path(config.PHOTON_DATA_DIR).exists() - - -def test_reconcile_interrupted_import_cleans_partial_index(fake_dirs: Path): - node_dir = Path(config.OS_NODE_DIR) - node_dir.mkdir(parents=True) - (node_dir / "segment.bin").write_text("partial") - filesystem.mark_import_in_progress() - - filesystem.reconcile_interrupted_import() - - assert not Path(config.PHOTON_DATA_DIR).exists() - assert filesystem.import_was_interrupted() is False - - -def test_reconcile_interrupted_import_noop_without_marker(fake_dirs: Path): - node_dir = Path(config.OS_NODE_DIR) - node_dir.mkdir(parents=True) - (node_dir / "segment.bin").write_text("complete") - - filesystem.reconcile_interrupted_import() - - assert Path(config.OS_NODE_DIR).exists() - - -def test_cleanup_staging_and_temp_backup_removes_both(tmp_path: Path): - staging = tmp_path / "staging" - backup = tmp_path / "backup" - staging.mkdir() - backup.mkdir() - (staging / "f").write_text("x") - (backup / "f").write_text("y") - - filesystem.cleanup_staging_and_temp_backup(str(staging), str(backup)) - - assert not staging.exists() - assert not backup.exists() - - -def test_cleanup_staging_and_temp_backup_no_op_when_missing(tmp_path: Path): - filesystem.cleanup_staging_and_temp_backup(str(tmp_path / "a"), str(tmp_path / "b")) - - -def test_cleanup_staging_and_temp_backup_swallows_rmtree_errors(tmp_path: Path): - staging = tmp_path / "staging" - staging.mkdir() - with patch("src.filesystem.shutil.rmtree", side_effect=OSError("locked")): - filesystem.cleanup_staging_and_temp_backup(str(staging), str(tmp_path / "missing")) - - -def test_cleanup_backup_after_verification_removes_backup(tmp_path: Path): - target = tmp_path / "node_1" - backup = Path(str(target) + ".backup") - backup.mkdir() - (backup / "x").write_text("x") - - assert filesystem.cleanup_backup_after_verification(str(target)) is True - assert not backup.exists() - - -def test_cleanup_backup_after_verification_returns_true_when_no_backup(tmp_path: Path): - target = tmp_path / "node_1" - assert filesystem.cleanup_backup_after_verification(str(target)) is True - - -def test_cleanup_backup_after_verification_returns_false_on_failure(tmp_path: Path): - target = tmp_path / "node_1" - backup = Path(str(target) + ".backup") - backup.mkdir() - with patch("src.filesystem.shutil.rmtree", side_effect=OSError("locked")): - assert filesystem.cleanup_backup_after_verification(str(target)) is False - - -def test_move_index_atomic_swaps_into_target(tmp_path: Path): - source = tmp_path / "source" - source.mkdir() - (source / "data.txt").write_text("new") - - target = tmp_path / "target" - - assert filesystem.move_index_atomic(str(source), str(target)) is True - assert (target / "data.txt").read_text() == "new" - assert not source.exists() - assert not (tmp_path / "target.staging").exists() - - -def test_move_index_atomic_replaces_existing_target(tmp_path: Path): - target = tmp_path / "target" - target.mkdir() - (target / "old.txt").write_text("old") - - source = tmp_path / "source" - source.mkdir() - (source / "new.txt").write_text("new") - - assert filesystem.move_index_atomic(str(source), str(target)) is True - assert (target / "new.txt").read_text() == "new" - assert not (target / "old.txt").exists() - backup = Path(str(target) + ".backup") - assert backup.exists() - assert (backup / "old.txt").read_text() == "old" - - -def test_move_index_atomic_cleans_existing_staging_dir(tmp_path: Path): - source = tmp_path / "source" - source.mkdir() - (source / "x.txt").write_text("x") - target = tmp_path / "target" - leftover_staging = Path(str(target) + ".staging") - leftover_staging.mkdir() - (leftover_staging / "stale.txt").write_text("stale") - - assert filesystem.move_index_atomic(str(source), str(target)) is True - assert (target / "x.txt").read_text() == "x" - assert not leftover_staging.exists() - - -def test_move_index_atomic_rolls_back_on_failure(tmp_path: Path): - source = tmp_path / "source" - source.mkdir() - (source / "new.txt").write_text("new") - target = tmp_path / "target" - target.mkdir() - (target / "old.txt").write_text("old") - - real_rename = os.rename - call_count = {"n": 0} - - def fake_rename(src, dst): - call_count["n"] += 1 - if call_count["n"] == 2: - raise OSError("rename boom") - real_rename(src, dst) - - with patch("src.filesystem.os.rename", side_effect=fake_rename), pytest.raises(OSError, match="rename boom"): - filesystem.move_index_atomic(str(source), str(target)) - - assert (target / "old.txt").read_text() == "old" - assert not Path(str(target) + ".backup").exists() - - -def test_rollback_atomic_move_keeps_new_index_when_succeeded(tmp_path: Path): - target = tmp_path / "target" - target.mkdir() - (target / "fresh.txt").write_text("fresh") - - filesystem.rollback_atomic_move( - str(tmp_path / "source"), str(target), str(tmp_path / "staging"), str(tmp_path / "backup") - ) - - assert (target / "fresh.txt").read_text() == "fresh" - - -def test_rollback_atomic_move_swallows_inner_exceptions(tmp_path: Path): - target = tmp_path / "target" - target.mkdir() - backup = tmp_path / "backup" - backup.mkdir() - - with patch("src.filesystem.shutil.rmtree", side_effect=OSError("nope")): - filesystem.rollback_atomic_move(str(tmp_path / "source"), str(target), str(tmp_path / "staging"), str(backup)) - - -def test_move_index_calls_atomic_and_writes_marker(fake_dirs: Path): - temp_photon = Path(config.TEMP_DIR) / "photon_data" - temp_photon.mkdir(parents=True) - (temp_photon / "node_1").mkdir() - (temp_photon / "node_1" / "data.bin").write_text("payload") - - assert filesystem.move_index() is True - - marker = Path(config.DATA_DIR) / ".photon-index-updated" - assert marker.exists() - target = Path(config.PHOTON_DATA_DIR) - assert (target / "node_1" / "data.bin").read_text() == "payload" - - -def test_move_index_returns_false_when_atomic_returns_false(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(filesystem, "move_index_atomic", lambda *_: False) - assert filesystem.move_index() is False - marker = Path(config.DATA_DIR) / ".photon-index-updated" - assert not marker.exists() - - -def test_extract_index_runs_lbzip2_command(fake_dirs: Path): - index_file = Path(config.TEMP_DIR).parent / "index.tar.bz2" - index_file.parent.mkdir(parents=True, exist_ok=True) - index_file.write_bytes(b"x") - - completed = subprocess.CompletedProcess(args="cmd", returncode=0, stdout="ok", stderr="") - with patch("src.filesystem.subprocess.run", return_value=completed) as run: - filesystem.extract_index(str(index_file)) - - args, kwargs = run.call_args - assert "lbzip2 -d -c" in args[0] - assert str(index_file) in args[0] - assert kwargs["shell"] is True - assert kwargs["check"] is True - assert Path(config.TEMP_DIR).exists() - - -def test_extract_index_creates_temp_dir_when_missing(fake_dirs: Path): - index_file = Path(config.DATA_DIR) / "index.tar.bz2" - index_file.write_bytes(b"x") - - assert not Path(config.TEMP_DIR).exists() - completed = subprocess.CompletedProcess(args="cmd", returncode=0, stdout="", stderr="") - with patch("src.filesystem.subprocess.run", return_value=completed): - filesystem.extract_index(str(index_file)) - - assert Path(config.TEMP_DIR).exists() - - -def test_extract_index_propagates_called_process_error(fake_dirs: Path): - index_file = Path(config.DATA_DIR) / "index.tar.bz2" - index_file.write_bytes(b"x") - err = subprocess.CalledProcessError(returncode=1, cmd="lbzip2 ...", output="", stderr="boom") - with patch("src.filesystem.subprocess.run", side_effect=err), pytest.raises(subprocess.CalledProcessError): - filesystem.extract_index(str(index_file)) - - -def test_extract_index_propagates_unexpected_error(fake_dirs: Path): - index_file = Path(config.DATA_DIR) / "index.tar.bz2" - index_file.write_bytes(b"x") - with patch("src.filesystem.subprocess.run", side_effect=RuntimeError("nope")), pytest.raises(RuntimeError): - filesystem.extract_index(str(index_file)) diff --git a/tests/test_importer.py b/tests/test_importer.py index 16da5aff..96be73b8 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -8,9 +8,8 @@ @pytest.fixture(autouse=True) def _stub_index_markers(monkeypatch): - monkeypatch.setattr(importer, "mark_import_in_progress", lambda: None) - monkeypatch.setattr(importer, "update_timestamp_marker", lambda: None) - monkeypatch.setattr(importer, "clear_import_in_progress", lambda: None) + monkeypatch.setattr(importer, "begin_import", lambda: None) + monkeypatch.setattr(importer, "complete_import", lambda: None) def _noop_makedirs(path: str, exist_ok: bool = False) -> None: @@ -215,7 +214,7 @@ def test_run_jsonl_import_raises_when_import_process_fails(monkeypatch): assert cleanup_calls == [True] -def test_run_jsonl_import_marks_then_clears_progress_on_success(monkeypatch): +def test_run_jsonl_import_marks_then_completes_on_success(monkeypatch): process = RecordingProcess() events = [] @@ -224,13 +223,12 @@ def test_run_jsonl_import_marks_then_clears_progress_on_success(monkeypatch): monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) - monkeypatch.setattr(importer, "mark_import_in_progress", lambda: events.append("mark")) - monkeypatch.setattr(importer, "update_timestamp_marker", lambda: events.append("timestamp")) - monkeypatch.setattr(importer, "clear_import_in_progress", lambda: events.append("clear")) + monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) + monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) importer.run_jsonl_import() - assert events == ["mark", "timestamp", "clear"] + assert events == ["begin", "complete"] def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): @@ -242,11 +240,10 @@ def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) - monkeypatch.setattr(importer, "mark_import_in_progress", lambda: events.append("mark")) - monkeypatch.setattr(importer, "update_timestamp_marker", lambda: events.append("timestamp")) - monkeypatch.setattr(importer, "clear_import_in_progress", lambda: events.append("clear")) + monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) + monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) with pytest.raises(RuntimeError, match="exit code 2"): importer.run_jsonl_import() - assert events == ["mark"] + assert events == ["begin"] diff --git a/tests/test_index.py b/tests/test_index.py new file mode 100644 index 00000000..a37ece66 --- /dev/null +++ b/tests/test_index.py @@ -0,0 +1,266 @@ +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + +from src import index +from src.utils import config + + +@pytest.fixture +def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + temp_dir = data_dir / "temp" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) + monkeypatch.setattr(config, "TEMP_DIR", str(temp_dir)) + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + return data_dir + + +def test_is_present_false_when_no_index(fake_dirs: Path): + assert index.is_present() is False + + +def test_is_present_true_when_node_dir_exists(fake_dirs: Path): + Path(config.OS_NODE_DIR).mkdir(parents=True) + assert index.is_present() is True + + +def test_last_updated_uses_marker_when_present(fake_dirs: Path): + marker = fake_dirs / ".photon-index-updated" + marker.write_text("") + os.utime(marker, (1_000_000, 1_000_000)) + assert index.last_updated() == 1_000_000 + assert index.has_update_timestamp() is True + + +def test_last_updated_falls_back_to_node_dir_mtime(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + os.utime(node_dir, (2_000_000, 2_000_000)) + assert index.last_updated() == 2_000_000 + assert index.has_update_timestamp() is False + + +def test_last_updated_returns_zero_when_nothing_exists(fake_dirs: Path): + assert index.last_updated() == 0.0 + + +def test_mark_updated_creates_marker(fake_dirs: Path): + index.mark_updated() + assert (fake_dirs / ".photon-index-updated").exists() + + +def test_mark_updated_swallows_errors(fake_dirs: Path): + with patch("src.index.Path.touch", side_effect=OSError("nope")): + index.mark_updated() + + +def test_import_marker_roundtrip(fake_dirs: Path): + assert index.import_was_interrupted() is False + + index.begin_import() + assert index.import_was_interrupted() is True + + index.complete_import() + assert index.import_was_interrupted() is False + + +def test_complete_import_touches_update_timestamp(fake_dirs: Path): + index.begin_import() + index.complete_import() + assert index.has_update_timestamp() is True + + +def test_complete_import_is_idempotent(fake_dirs: Path): + index.complete_import() + assert index.import_was_interrupted() is False + + +def test_begin_import_raises_when_marker_cannot_be_written(fake_dirs: Path): + with patch("src.index.Path.touch", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): + index.begin_import() + + +def test_complete_import_raises_when_marker_cannot_be_cleared(fake_dirs: Path): + index.begin_import() + with patch("src.index.Path.unlink", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): + index.complete_import() + + +def test_reconcile_cleans_partial_index(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + (node_dir / "segment.bin").write_text("partial") + index.begin_import() + + index.reconcile() + + assert not Path(config.PHOTON_DATA_DIR).exists() + assert index.import_was_interrupted() is False + + +def test_reconcile_noop_without_marker(fake_dirs: Path): + node_dir = Path(config.OS_NODE_DIR) + node_dir.mkdir(parents=True) + (node_dir / "segment.bin").write_text("complete") + + index.reconcile() + + assert Path(config.OS_NODE_DIR).exists() + + +def test_activate_swaps_into_target(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "data.txt").write_text("new") + + index.activate(str(source)) + + target = Path(config.PHOTON_DATA_DIR) + assert (target / "data.txt").read_text() == "new" + assert not source.exists() + assert not Path(str(target) + ".staging").exists() + assert index.has_update_timestamp() is True + + +def test_activate_replaces_existing_target_keeping_backup(fake_dirs: Path): + target = Path(config.PHOTON_DATA_DIR) + target.mkdir(parents=True) + (target / "old.txt").write_text("old") + + source = fake_dirs / "source" + source.mkdir() + (source / "new.txt").write_text("new") + + index.activate(str(source)) + + assert (target / "new.txt").read_text() == "new" + assert not (target / "old.txt").exists() + backup = Path(str(target) + ".backup") + assert backup.exists() + assert (backup / "old.txt").read_text() == "old" + + +def test_activate_cleans_leftover_staging_dir(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "x.txt").write_text("x") + leftover_staging = Path(config.PHOTON_DATA_DIR + ".staging") + leftover_staging.mkdir(parents=True) + (leftover_staging / "stale.txt").write_text("stale") + + index.activate(str(source)) + + assert (Path(config.PHOTON_DATA_DIR) / "x.txt").read_text() == "x" + assert not leftover_staging.exists() + + +def _fail_staging_to_target_rename(): + real_rename = os.rename + + def fake_rename(src, dst): + if str(src).endswith(".staging") and str(dst) == config.PHOTON_DATA_DIR: + raise OSError("rename boom") + real_rename(src, dst) + + return fake_rename + + +def test_activate_rolls_back_on_failure(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "new.txt").write_text("new") + target = Path(config.PHOTON_DATA_DIR) + target.mkdir(parents=True) + (target / "old.txt").write_text("old") + + with ( + patch("src.index.os.rename", side_effect=_fail_staging_to_target_rename()), + pytest.raises(OSError, match="rename boom"), + ): + index.activate(str(source)) + + assert (target / "old.txt").read_text() == "old" + assert (source / "new.txt").read_text() == "new" + assert not Path(str(target) + ".backup").exists() + assert not Path(str(target) + ".staging").exists() + assert index.has_update_timestamp() is False + + +def test_activate_rolls_back_on_failure_without_existing_target(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "new.txt").write_text("new") + + with ( + patch("src.index.os.rename", side_effect=_fail_staging_to_target_rename()), + pytest.raises(OSError, match="rename boom"), + ): + index.activate(str(source)) + + assert not Path(config.PHOTON_DATA_DIR).exists() + assert (source / "new.txt").read_text() == "new" + assert not Path(config.PHOTON_DATA_DIR + ".staging").exists() + + +def test_activate_raises_rollback_error_when_rollback_fails(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "new.txt").write_text("new") + target = Path(config.PHOTON_DATA_DIR) + target.mkdir(parents=True) + (target / "old.txt").write_text("old") + + real_rename = os.rename + + def fake_rename(src, dst): + if str(dst) == config.PHOTON_DATA_DIR: + raise OSError("rename boom") + real_rename(src, dst) + + with ( + patch("src.index.os.rename", side_effect=fake_rename), + pytest.raises(index.IndexRollbackError, match="manual intervention"), + ): + index.activate(str(source)) + + +def test_activate_aborts_when_leftover_cleanup_fails(fake_dirs: Path): + source = fake_dirs / "source" + source.mkdir() + (source / "new.txt").write_text("new") + leftover = Path(config.PHOTON_DATA_DIR + ".staging") + leftover.mkdir(parents=True) + + with patch("src.index.shutil.rmtree", side_effect=OSError("locked")), pytest.raises(OSError, match="locked"): + index.activate(str(source)) + + assert (source / "new.txt").read_text() == "new" + assert not Path(config.PHOTON_DATA_DIR).exists() + + +def test_drop_backup_removes_backup(fake_dirs: Path): + backup = Path(config.PHOTON_DATA_DIR + ".backup") + backup.mkdir(parents=True) + (backup / "x").write_text("x") + + assert index.drop_backup() is True + assert not backup.exists() + + +def test_drop_backup_returns_true_when_no_backup(fake_dirs: Path): + assert index.drop_backup() is True + + +def test_drop_backup_returns_false_on_failure(fake_dirs: Path): + backup = Path(config.PHOTON_DATA_DIR + ".backup") + backup.mkdir(parents=True) + with patch("src.index.shutil.rmtree", side_effect=OSError("locked")): + assert index.drop_backup() is False diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 29286ba9..7a2dd1d4 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -79,19 +79,6 @@ def test_handle_shutdown_sets_exit_and_calls_shutdown(manager: process_manager.P shutdown.assert_called_once() -def test_run_initial_setup_exits_on_failure(manager: process_manager.PhotonManager): - completed = subprocess.CompletedProcess(args=[], returncode=1) - with patch("src.process_manager.subprocess.run", return_value=completed), pytest.raises(SystemExit) as exc: - manager.run_initial_setup() - assert exc.value.code == 1 - - -def test_run_initial_setup_succeeds_on_zero_exit(manager: process_manager.PhotonManager): - completed = subprocess.CompletedProcess(args=[], returncode=0) - with patch("src.process_manager.subprocess.run", return_value=completed): - manager.run_initial_setup() - - def test_start_photon_builds_full_command(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "ENABLE_METRICS", True) monkeypatch.setattr(config, "JAVA_PARAMS", "-Xmx4g") @@ -242,7 +229,7 @@ def test_cleanup_lock_files_swallows_remove_errors( def test_run_update_skips_when_disabled(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "UPDATE_STRATEGY", "DISABLED") - with patch("src.process_manager.subprocess.run") as run: + with patch("src.process_manager.update.run_update") as run: manager.run_update() run.assert_not_called() @@ -253,7 +240,7 @@ def test_run_update_no_op_when_index_up_to_date( monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") with ( patch("src.process_manager.compare_mtime", return_value=False), - patch("src.process_manager.subprocess.run") as run, + patch("src.process_manager.update.run_update") as run, ): manager.run_update() run.assert_not_called() @@ -262,66 +249,149 @@ def test_run_update_no_op_when_index_up_to_date( def test_run_update_parallel_path(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") - completed = subprocess.CompletedProcess(args=[], returncode=0) with ( patch("src.process_manager.compare_mtime", return_value=True), - patch("src.process_manager.subprocess.run", return_value=completed), + patch("src.process_manager.update.run_update") as run, + patch("src.process_manager.send_notification"), patch.object(manager, "stop_photon") as stop, patch.object(manager, "start_photon", return_value=True) as start, - patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + patch("src.process_manager.index.drop_backup") as cleanup, ): manager.run_update() + run.assert_called_once_with("PARALLEL") stop.assert_called_once() start.assert_called_once() cleanup.assert_called_once() + assert manager.state == process_manager.AppState.RUNNING -def test_run_update_parallel_logs_failure_when_health_check_fails( +def test_run_update_parallel_keeps_backup_when_health_check_fails( manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") - completed = subprocess.CompletedProcess(args=[], returncode=0) with ( patch("src.process_manager.compare_mtime", return_value=True), - patch("src.process_manager.subprocess.run", return_value=completed), + patch("src.process_manager.update.run_update"), + patch("src.process_manager.send_notification"), patch.object(manager, "stop_photon"), patch.object(manager, "start_photon", return_value=False), - patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + patch("src.process_manager.index.drop_backup") as cleanup, ): manager.run_update() cleanup.assert_not_called() + assert manager.state == process_manager.AppState.RUNNING def test_run_update_sequential_path(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") - completed = subprocess.CompletedProcess(args=[], returncode=0) with ( patch("src.process_manager.compare_mtime", return_value=True), - patch("src.process_manager.subprocess.run", return_value=completed), + patch("src.process_manager.update.run_update") as run, + patch("src.process_manager.send_notification"), patch.object(manager, "stop_photon") as stop, patch.object(manager, "start_photon", return_value=True) as start, - patch("src.process_manager.cleanup_backup_after_verification") as cleanup, + patch("src.process_manager.index.drop_backup") as cleanup, ): manager.run_update() - stop.assert_called_once() + run.assert_called_once_with("SEQUENTIAL") + assert stop.call_count == 2 start.assert_called_once() cleanup.assert_called_once() -def test_run_update_sequential_restarts_photon_after_failed_update( +def test_run_update_restarts_photon_and_notifies_after_failed_update( manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") - completed = subprocess.CompletedProcess(args=[], returncode=1) manager.photon_process = None with ( patch("src.process_manager.compare_mtime", return_value=True), - patch("src.process_manager.subprocess.run", return_value=completed), + patch("src.process_manager.update.run_update", side_effect=process_manager.update.UpdateError("boom")), + patch("src.process_manager.send_notification") as notify, patch.object(manager, "stop_photon"), patch.object(manager, "start_photon", return_value=True) as start, + patch("src.process_manager.index.drop_backup") as cleanup, ): manager.run_update() start.assert_called_once() + cleanup.assert_not_called() + assert manager.state == process_manager.AppState.RUNNING + + messages = [call.args[0] for call in notify.call_args_list] + assert any("Photon Update Failed" in m for m in messages) + + +def test_run_update_notifies_success(manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.update.run_update"), + patch("src.process_manager.send_notification") as notify, + patch.object(manager, "stop_photon"), + patch.object(manager, "start_photon", return_value=True), + patch("src.process_manager.index.drop_backup"), + ): + manager.run_update() + + messages = [call.args[0] for call in notify.call_args_list] + assert any("Updated Successfully" in m for m in messages) + + +def test_run_update_restores_state_when_pipeline_raises_unexpectedly( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + manager.photon_process = MagicMock() + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.update.run_update", side_effect=RuntimeError("unexpected")), + patch("src.process_manager.send_notification"), + patch.object(manager, "stop_photon") as stop, + patch.object(manager, "start_photon", return_value=True) as start, + ): + manager.run_update() + assert manager.state == process_manager.AppState.RUNNING + stop.assert_not_called() + start.assert_not_called() + + +def test_run_update_survives_error_outside_pipeline( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") + with ( + patch("src.process_manager.compare_mtime", side_effect=RuntimeError("header parse boom")), + patch("src.process_manager.send_notification") as notify, + ): + manager.run_update() + assert manager.state == process_manager.AppState.RUNNING + messages = [call.args[0] for call in notify.call_args_list] + assert any("Photon Update Failed" in m for m in messages) + + +def test_run_update_notifies_when_health_check_fails_after_swap( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") + with ( + patch("src.process_manager.compare_mtime", return_value=True), + patch("src.process_manager.update.run_update"), + patch("src.process_manager.send_notification") as notify, + patch.object(manager, "stop_photon"), + patch.object(manager, "start_photon", return_value=False), + patch("src.process_manager.index.drop_backup"), + ): + manager.run_update() + messages = [call.args[0] for call in notify.call_args_list] + assert any("Photon Update Failed" in m for m in messages) + assert not any("Updated Successfully" in m for m in messages) + + +def test_run_pending_jobs_survives_job_exception( + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch +): + monkeypatch.setattr(process_manager.schedule, "run_pending", MagicMock(side_effect=RuntimeError("job boom"))) + manager._run_pending_jobs() @pytest.mark.parametrize(("interval", "expected_unit"), [("3d", "days"), ("12h", "hours"), ("30m", "minutes")]) @@ -397,45 +467,39 @@ def test_shutdown_calls_stop_and_exits(manager: process_manager.PhotonManager): assert exc.value.code == 0 -def test_run_skips_setup_when_index_present( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path -): - node = tmp_path / "node_1" - node.mkdir() - monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) - monkeypatch.setattr(config, "FORCE_UPDATE", False) +def test_run_invokes_setup_then_starts_photon(manager: process_manager.PhotonManager): with ( - patch.object(manager, "run_initial_setup") as setup, - patch.object(manager, "start_photon", return_value=True), - patch.object(manager, "schedule_updates"), + patch("src.process_manager.run_setup") as setup, + patch.object(manager, "start_photon", return_value=True) as start, + patch.object(manager, "schedule_updates") as sched, patch.object(manager, "monitor_photon"), ): manager.run() - setup.assert_not_called() + setup.assert_called_once() + start.assert_called_once() + sched.assert_called_once() -def test_run_invokes_initial_setup_when_no_index( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path -): - monkeypatch.setattr(config, "OS_NODE_DIR", str(tmp_path / "missing")) - monkeypatch.setattr(config, "FORCE_UPDATE", False) +def test_run_exits_75_when_setup_hits_insufficient_space(manager: process_manager.PhotonManager): with ( - patch.object(manager, "run_initial_setup") as setup, - patch.object(manager, "start_photon", return_value=True), - patch.object(manager, "schedule_updates"), - patch.object(manager, "monitor_photon"), + patch("src.process_manager.run_setup", side_effect=process_manager.update.InsufficientSpaceError("no space")), + pytest.raises(SystemExit) as exc, ): manager.run() - setup.assert_called_once() + assert exc.value.code == 75 -def test_run_exits_when_photon_fails_to_start( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path -): - node = tmp_path / "node_1" - node.mkdir() - monkeypatch.setattr(config, "OS_NODE_DIR", str(node)) - monkeypatch.setattr(config, "FORCE_UPDATE", False) - with patch.object(manager, "start_photon", return_value=False), pytest.raises(SystemExit) as exc: +def test_run_exits_1_when_setup_fails(manager: process_manager.PhotonManager): + with patch("src.process_manager.run_setup", side_effect=ValueError("bad config")), pytest.raises(SystemExit) as exc: + manager.run() + assert exc.value.code == 1 + + +def test_run_exits_when_photon_fails_to_start(manager: process_manager.PhotonManager): + with ( + patch("src.process_manager.run_setup"), + patch.object(manager, "start_photon", return_value=False), + pytest.raises(SystemExit) as exc, + ): manager.run() assert exc.value.code == 1 diff --git a/tests/test_update.py b/tests/test_update.py new file mode 100644 index 00000000..bc883349 --- /dev/null +++ b/tests/test_update.py @@ -0,0 +1,330 @@ +import hashlib +import subprocess +from pathlib import Path +from unittest.mock import patch + +import pytest + +from src import update +from src.check_remote import RemoteFileSizeError +from src.utils import config + + +@pytest.fixture +def fake_dirs(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + data_dir = tmp_path / "data" + photon_data_dir = data_dir / "photon_data" + temp_dir = data_dir / "temp" + os_node_dir = photon_data_dir / "node_1" + data_dir.mkdir() + + monkeypatch.setattr(config, "DATA_DIR", str(data_dir)) + monkeypatch.setattr(config, "PHOTON_DATA_DIR", str(photon_data_dir)) + monkeypatch.setattr(config, "TEMP_DIR", str(temp_dir)) + monkeypatch.setattr(config, "OS_NODE_DIR", str(os_node_dir)) + return data_dir + + +def test_get_download_url_uses_file_url_when_set(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FILE_URL", "https://override.example/file.tar.bz2") + assert update.get_download_url() == "https://override.example/file.tar.bz2" + + +def test_get_download_url_constructs_from_region_and_base(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") + monkeypatch.setattr(config, "REGION", "europe") + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + url = update.get_download_url() + assert url == "https://example.com/public/europe/photon-db-europe-1.0-latest.tar.bz2" + + +def test_download_index_returns_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + monkeypatch.setattr(update, "get_download_url", lambda: "https://example.com/x") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + def fake_download(_url, output): + Path(output).write_bytes(b"x") + return True + + with patch("src.update.download_file", side_effect=fake_download): + out = update.download_index() + + assert out == str(Path(config.TEMP_DIR) / "photon-db-latest.tar.bz2") + assert Path(out).exists() + + +def test_download_index_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + monkeypatch.setattr(update, "get_download_url", lambda: "https://example.com/x") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + with ( + patch("src.update.download_file", return_value=False), + pytest.raises(update.DownloadError, match="Failed to download index"), + ): + update.download_index() + + +def test_download_md5_uses_explicit_url(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", "https://example.com/custom.md5") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + captured = {} + + def fake_download(url, output): + captured["url"] = url + Path(output).write_text("md5") + return True + + with patch("src.update.download_file", side_effect=fake_download): + out = update.download_md5() + + assert captured["url"] == "https://example.com/custom.md5" + assert out.endswith("photon-db-latest.tar.bz2.md5") + + +def test_download_md5_constructs_url_when_unset(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", None) + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://example.com/public") + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + captured = {} + + def fake_download(url, output): + captured["url"] = url + Path(output).write_text("md5") + return True + + with patch("src.update.download_file", side_effect=fake_download): + update.download_md5() + + assert captured["url"] == "https://example.com/public/photon-db-planet-1.0-latest.tar.bz2.md5" + + +def test_download_md5_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", "https://example.com/x.md5") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + with ( + patch("src.update.download_file", return_value=False), + pytest.raises(update.DownloadError, match="Failed to download MD5"), + ): + update.download_md5() + + +def test_extract_index_runs_lbzip2_command(fake_dirs: Path): + index_file = fake_dirs / "index.tar.bz2" + index_file.write_bytes(b"x") + + completed = subprocess.CompletedProcess(args="cmd", returncode=0, stdout="ok", stderr="") + with patch("src.update.subprocess.run", return_value=completed) as run: + update.extract_index(str(index_file)) + + args, kwargs = run.call_args + cmd = args[0] + assert cmd[:4] == ["/bin/bash", "-o", "pipefail", "-c"] + assert "lbzip2 -d -c" in cmd[4] + assert str(index_file) in cmd[4] + assert kwargs["check"] is True + assert Path(config.TEMP_DIR).exists() + + +def test_extract_index_raises_extraction_error_on_failure(fake_dirs: Path): + index_file = fake_dirs / "index.tar.bz2" + index_file.write_bytes(b"x") + err = subprocess.CalledProcessError(returncode=1, cmd="lbzip2 ...", output="", stderr="boom") + with ( + patch("src.update.subprocess.run", side_effect=err), + pytest.raises(update.ExtractionError, match="return code 1"), + ): + update.extract_index(str(index_file)) + + +def test_verify_checksum_returns_true_on_match(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"hello world") + expected = hashlib.md5(b"hello world").hexdigest() # noqa: S324 + md5_file = tmp_path / "index.bin.md5" + md5_file.write_text(f"{expected} index.bin\n") + + assert update.verify_checksum(str(md5_file), str(index_file)) is True + + +def test_verify_checksum_raises_on_mismatch(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"hello world") + md5_file = tmp_path / "index.bin.md5" + md5_file.write_text("00000000000000000000000000000000 index.bin\n") + + with pytest.raises(update.ChecksumMismatchError, match="Checksum mismatch"): + update.verify_checksum(str(md5_file), str(index_file)) + + +def test_verify_checksum_raises_when_index_missing(tmp_path: Path): + md5_file = tmp_path / "x.md5" + md5_file.write_text("0" * 32) + with pytest.raises(FileNotFoundError): + update.verify_checksum(str(md5_file), str(tmp_path / "missing")) + + +def test_verify_checksum_raises_when_md5_missing(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"data") + with pytest.raises(FileNotFoundError): + update.verify_checksum(str(tmp_path / "missing.md5"), str(index_file)) + + +def test_verify_checksum_raises_on_empty_md5_file(tmp_path: Path): + index_file = tmp_path / "index.bin" + index_file.write_bytes(b"data") + md5_file = tmp_path / "empty.md5" + md5_file.write_text("") + with pytest.raises((IndexError, ValueError)): + update.verify_checksum(str(md5_file), str(index_file)) + + +def _make_pipeline_patches(monkeypatch: pytest.MonkeyPatch): + fake_index = str(Path(config.TEMP_DIR) / "index.tar.bz2") + fake_md5 = fake_index + ".md5" + monkeypatch.setattr(update, "get_download_url", lambda: "https://example.com/x") + monkeypatch.setattr(update, "get_remote_file_size", lambda _: 1024) + monkeypatch.setattr(update, "check_disk_space_requirements", lambda *_, **__: True) + monkeypatch.setattr(update, "download_index", lambda: fake_index) + monkeypatch.setattr(update, "download_md5", lambda: fake_md5) + monkeypatch.setattr(update, "extract_index", lambda _: None) + monkeypatch.setattr(update, "verify_checksum", lambda *_: True) + monkeypatch.setattr(update.index, "activate", lambda _: None) + monkeypatch.setattr(update, "clear_temp_dir", lambda: None) + + +def test_run_update_happy_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + _make_pipeline_patches(monkeypatch) + update.run_update("PARALLEL") + assert Path(config.TEMP_DIR).exists() + + +def test_run_update_passes_strategy_to_space_check(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + captured = {} + + def fake_check(size, is_parallel): + captured["is_parallel"] = is_parallel + return True + + monkeypatch.setattr(update, "check_disk_space_requirements", fake_check) + + update.run_update("PARALLEL") + assert captured["is_parallel"] is True + + update.run_update("SEQUENTIAL") + assert captured["is_parallel"] is False + + +def test_run_update_skips_md5_when_configured(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + md5_called = {"n": 0} + + def fake_md5(): + md5_called["n"] += 1 + return str(Path(config.TEMP_DIR) / "x.md5") + + monkeypatch.setattr(update, "download_md5", fake_md5) + update.run_update("PARALLEL") + assert md5_called["n"] == 0 + + +def test_run_update_raises_insufficient_space(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + _make_pipeline_patches(monkeypatch) + monkeypatch.setattr(update, "check_disk_space_requirements", lambda *_, **__: False) + with pytest.raises(update.InsufficientSpaceError): + update.run_update("PARALLEL") + + +def test_run_update_skip_space_check_proceeds_on_size_error(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_SPACE_CHECK", True) + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + def boom(_url): + raise RemoteFileSizeError("no size") + + monkeypatch.setattr(update, "get_remote_file_size", boom) + update.run_update("PARALLEL") + + +def test_run_update_raises_on_size_error_without_skip(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_SPACE_CHECK", False) + _make_pipeline_patches(monkeypatch) + + def boom(_url): + raise RemoteFileSizeError("no size") + + monkeypatch.setattr(update, "get_remote_file_size", boom) + with pytest.raises(update.UpdateError, match="no size"): + update.run_update("SEQUENTIAL") + + +def test_run_update_propagates_download_error(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + def boom(): + raise update.DownloadError("download died") + + monkeypatch.setattr(update, "download_index", boom) + with pytest.raises(update.DownloadError, match="download died"): + update.run_update("SEQUENTIAL") + + +def test_run_update_checksum_mismatch_prevents_activation(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + _make_pipeline_patches(monkeypatch) + + activated = {"n": 0} + + def fake_activate(_): + activated["n"] += 1 + + monkeypatch.setattr(update.index, "activate", fake_activate) + + def boom(*_): + raise update.ChecksumMismatchError("checksum mismatch") + + monkeypatch.setattr(update, "verify_checksum", boom) + + with pytest.raises(update.ChecksumMismatchError, match="checksum mismatch"): + update.run_update("PARALLEL") + assert activated["n"] == 0 + + +def test_run_update_extraction_failure_prevents_activation(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + activated = {"n": 0} + + def fake_activate(_): + activated["n"] += 1 + + monkeypatch.setattr(update.index, "activate", fake_activate) + + def boom(_): + raise update.ExtractionError("truncated archive") + + monkeypatch.setattr(update, "extract_index", boom) + + with pytest.raises(update.ExtractionError, match="truncated archive"): + update.run_update("SEQUENTIAL") + assert activated["n"] == 0 diff --git a/tests/test_updater.py b/tests/test_updater.py deleted file mode 100644 index 2e6794db..00000000 --- a/tests/test_updater.py +++ /dev/null @@ -1,57 +0,0 @@ -from unittest.mock import patch - -import pytest - -from src import updater -from src.utils import config - - -def test_updater_main_runs_parallel(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") - with ( - patch("src.updater.parallel_update") as parallel, - patch("src.updater.sequential_update") as sequential, - patch("src.updater.send_notification") as notify, - ): - updater.main() - parallel.assert_called_once_with() - sequential.assert_not_called() - notify.assert_called_once_with("Photon Index Updated Successfully") - - -def test_updater_main_runs_sequential(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") - with ( - patch("src.updater.parallel_update") as parallel, - patch("src.updater.sequential_update") as sequential, - patch("src.updater.send_notification"), - ): - updater.main() - parallel.assert_not_called() - sequential.assert_called_once_with() - - -def test_updater_main_exits_on_unknown_strategy(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "UPDATE_STRATEGY", "BOGUS") - with ( - patch("src.updater.parallel_update"), - patch("src.updater.sequential_update"), - patch("src.updater.send_notification"), - pytest.raises(SystemExit) as exc, - ): - updater.main() - assert exc.value.code == 1 - - -def test_updater_main_notifies_on_failure(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setattr(config, "UPDATE_STRATEGY", "PARALLEL") - with ( - patch("src.updater.parallel_update", side_effect=RuntimeError("boom")), - patch("src.updater.send_notification") as notify, - pytest.raises(SystemExit) as exc, - ): - updater.main() - assert exc.value.code == 1 - - args = [call.args[0] for call in notify.call_args_list] - assert any("Photon Update Failed" in a for a in args) From a5da3c8aa7622cebc209b6b4d9b2504756cb0e50 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sat, 13 Jun 2026 18:09:36 +0200 Subject: [PATCH 12/27] fix: index marker scope --- src/importer.py | 2 +- src/index.py | 2 +- src/update.py | 4 ++-- tests/test_downloader.py | 6 ++++-- tests/test_importer.py | 26 ++++++++++++++++++++++++-- tests/test_index.py | 7 +++++++ tests/test_update.py | 33 +++++++++++++++++++++++++++++++++ 7 files changed, 72 insertions(+), 8 deletions(-) diff --git a/src/importer.py b/src/importer.py index ba072abf..741b31f9 100644 --- a/src/importer.py +++ b/src/importer.py @@ -21,9 +21,9 @@ def run_jsonl_import() -> None: parent_region = get_jsonl_parent_region(regions) country_codes = get_country_codes_for_regions(regions) if len(regions) > 1 else None - begin_import() try: jsonl_path = download_jsonl(parent_region) + begin_import() import_proc = _start_photon_import("-", country_codes=country_codes) try: if import_proc.stdin is None: diff --git a/src/index.py b/src/index.py index 7eb844f0..92d7a8a6 100644 --- a/src/index.py +++ b/src/index.py @@ -54,8 +54,8 @@ def begin_import(): def complete_import(): - mark_updated() _clear_import_marker() + mark_updated() def import_was_interrupted() -> bool: diff --git a/src/update.py b/src/update.py index 627e015e..0b8d6de6 100644 --- a/src/update.py +++ b/src/update.py @@ -40,7 +40,7 @@ def get_download_url() -> str: index_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) download_url = config.BASE_URL + index_path - logging.info("Using constructed location for download: %s", download_url) + logging.info("Using constructed location for download: %s", sanitize_url(download_url)) return download_url @@ -63,7 +63,7 @@ def download_md5() -> str: else: md5_path = get_index_url_path(config.REGION, config.INDEX_DB_VERSION, config.INDEX_FILE_EXTENSION) + ".md5" download_url = config.BASE_URL + md5_path - logging.info("Using constructed URL for checksum: %s", download_url) + logging.info("Using constructed URL for checksum: %s", sanitize_url(download_url)) output_file = f"photon-db-latest.{config.INDEX_FILE_EXTENSION}.md5" output = os.path.join(config.TEMP_DIR, output_file) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 22863e70..1685fd0b 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -365,11 +365,13 @@ def test_download_file_returns_false_on_unexpected_exception(tmp_path: Path, mon assert downloader.download_file("https://example.com/x", str(dest)) is False -def test_download_file_propagates_oserror(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): +def test_download_file_propagates_disk_write_oserror(tmp_path: Path, monkeypatch: pytest.MonkeyPatch): dest = tmp_path / "out.bin" monkeypatch.setattr(config, "DOWNLOAD_MAX_RETRIES", "1") + resp = _mock_response(status_code=200, headers={"content-length": "3"}, chunks=[b"abc"]) with ( - patch("src.downloader.requests.get", side_effect=OSError("disk full")), + patch("src.downloader.requests.get", return_value=resp), + patch("src.downloader.open", side_effect=OSError("disk full")), pytest.raises(OSError, match="disk full"), ): downloader.download_file("https://example.com/x", str(dest)) diff --git a/tests/test_importer.py b/tests/test_importer.py index 96be73b8..ad257067 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -218,8 +218,12 @@ def test_run_jsonl_import_marks_then_completes_on_success(monkeypatch): process = RecordingProcess() events = [] + def fake_download(region): + events.append("download") + return "/photon/data/temp/andorra.jsonl.zst" + monkeypatch.setattr(config, "REGION", "andorra") - monkeypatch.setattr(importer, "download_jsonl", lambda region: "/photon/data/temp/andorra.jsonl.zst") + monkeypatch.setattr(importer, "download_jsonl", fake_download) monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) @@ -228,7 +232,7 @@ def test_run_jsonl_import_marks_then_completes_on_success(monkeypatch): importer.run_jsonl_import() - assert events == ["begin", "complete"] + assert events == ["download", "begin", "complete"] def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): @@ -247,3 +251,21 @@ def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): importer.run_jsonl_import() assert events == ["begin"] + + +def test_run_jsonl_import_sets_no_marker_when_download_fails(monkeypatch): + events = [] + + def failing_download(region): + raise RuntimeError("download failed") + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", failing_download) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) + monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) + + with pytest.raises(RuntimeError, match="download failed"): + importer.run_jsonl_import() + + assert events == [] diff --git a/tests/test_index.py b/tests/test_index.py index a37ece66..2b19a74d 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -94,6 +94,13 @@ def test_complete_import_raises_when_marker_cannot_be_cleared(fake_dirs: Path): index.complete_import() +def test_complete_import_does_not_mark_updated_when_clear_fails(fake_dirs: Path): + index.begin_import() + with patch("src.index.Path.unlink", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): + index.complete_import() + assert index.has_update_timestamp() is False + + def test_reconcile_cleans_partial_index(fake_dirs: Path): node_dir = Path(config.OS_NODE_DIR) node_dir.mkdir(parents=True) diff --git a/tests/test_update.py b/tests/test_update.py index bc883349..a449bf1e 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -40,6 +40,20 @@ def test_get_download_url_constructs_from_region_and_base(monkeypatch: pytest.Mo assert url == "https://example.com/public/europe/photon-db-europe-1.0-latest.tar.bz2" +def test_get_download_url_sanitizes_credentials_in_log(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://user:secret@example.com/public") + monkeypatch.setattr(config, "REGION", "europe") + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + + with patch.object(update.logging, "info") as log_info: + url = update.get_download_url() + + assert "secret" in url + assert all("secret" not in str(call) for call in log_info.call_args_list) + + def test_download_index_returns_path(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") monkeypatch.setattr(update, "get_download_url", lambda: "https://example.com/x") @@ -108,6 +122,25 @@ def fake_download(url, output): assert captured["url"] == "https://example.com/public/photon-db-planet-1.0-latest.tar.bz2.md5" +def test_download_md5_sanitizes_credentials_in_log(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "MD5_URL", None) + monkeypatch.setattr(config, "FILE_URL", None) + monkeypatch.setattr(config, "BASE_URL", "https://user:secret@example.com/public") + monkeypatch.setattr(config, "REGION", None) + monkeypatch.setattr(config, "INDEX_DB_VERSION", "1.0") + monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") + Path(config.TEMP_DIR).mkdir(parents=True, exist_ok=True) + + def fake_download(url, output): + Path(output).write_text("md5") + return True + + with patch("src.update.download_file", side_effect=fake_download), patch.object(update.logging, "info") as log_info: + update.download_md5() + + assert all("secret" not in str(call) for call in log_info.call_args_list) + + def test_download_md5_raises_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "MD5_URL", "https://example.com/x.md5") monkeypatch.setattr(config, "INDEX_FILE_EXTENSION", "tar.bz2") From 04a11060fe2efb9bcf1219d344ee019f782eb796 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sat, 13 Jun 2026 19:25:02 +0200 Subject: [PATCH 13/27] refactor(import): rename import marker functions for clarity --- src/importer.py | 6 +++--- src/index.py | 4 ++-- tests/test_importer.py | 35 +++++++++++++++++++++++++++-------- tests/test_index.py | 32 ++++++++++++++++---------------- 4 files changed, 48 insertions(+), 29 deletions(-) diff --git a/src/importer.py b/src/importer.py index 741b31f9..ef36bf82 100644 --- a/src/importer.py +++ b/src/importer.py @@ -3,7 +3,7 @@ import subprocess from src.downloader import clear_temp_dir -from src.index import begin_import, complete_import +from src.index import mark_import_complete, mark_import_started from src.jsonl.decompressor import stream_decompress from src.jsonl.downloader import download_jsonl from src.utils import config @@ -23,8 +23,8 @@ def run_jsonl_import() -> None: try: jsonl_path = download_jsonl(parent_region) - begin_import() import_proc = _start_photon_import("-", country_codes=country_codes) + mark_import_started() try: if import_proc.stdin is None: raise RuntimeError("Photon import process stdin is unavailable") @@ -39,7 +39,7 @@ def run_jsonl_import() -> None: import_proc.kill() import_proc.wait() raise - complete_import() + mark_import_complete() finally: clear_temp_dir() diff --git a/src/index.py b/src/index.py index 92d7a8a6..54e9351a 100644 --- a/src/index.py +++ b/src/index.py @@ -47,13 +47,13 @@ def mark_updated(): logging.warning(f"Failed to update timestamp marker: {e}") -def begin_import(): +def mark_import_started(): marker_file = _import_in_progress_marker() Path(marker_file).touch() logging.debug(f"Marked import in progress: {marker_file}") -def complete_import(): +def mark_import_complete(): _clear_import_marker() mark_updated() diff --git a/tests/test_importer.py b/tests/test_importer.py index ad257067..a70aa4a4 100644 --- a/tests/test_importer.py +++ b/tests/test_importer.py @@ -8,8 +8,8 @@ @pytest.fixture(autouse=True) def _stub_index_markers(monkeypatch): - monkeypatch.setattr(importer, "begin_import", lambda: None) - monkeypatch.setattr(importer, "complete_import", lambda: None) + monkeypatch.setattr(importer, "mark_import_started", lambda: None) + monkeypatch.setattr(importer, "mark_import_complete", lambda: None) def _noop_makedirs(path: str, exist_ok: bool = False) -> None: @@ -227,8 +227,8 @@ def fake_download(region): monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) - monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) - monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) + monkeypatch.setattr(importer, "mark_import_started", lambda: events.append("begin")) + monkeypatch.setattr(importer, "mark_import_complete", lambda: events.append("complete")) importer.run_jsonl_import() @@ -244,8 +244,8 @@ def test_run_jsonl_import_leaves_progress_marker_on_failure(monkeypatch): monkeypatch.setattr(importer, "stream_decompress", lambda path: [b'{"type":"Place"}\n']) monkeypatch.setattr(importer, "_start_photon_import", lambda input_source, country_codes=None: process) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) - monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) - monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) + monkeypatch.setattr(importer, "mark_import_started", lambda: events.append("begin")) + monkeypatch.setattr(importer, "mark_import_complete", lambda: events.append("complete")) with pytest.raises(RuntimeError, match="exit code 2"): importer.run_jsonl_import() @@ -262,10 +262,29 @@ def failing_download(region): monkeypatch.setattr(config, "REGION", "andorra") monkeypatch.setattr(importer, "download_jsonl", failing_download) monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) - monkeypatch.setattr(importer, "begin_import", lambda: events.append("begin")) - monkeypatch.setattr(importer, "complete_import", lambda: events.append("complete")) + monkeypatch.setattr(importer, "mark_import_started", lambda: events.append("begin")) + monkeypatch.setattr(importer, "mark_import_complete", lambda: events.append("complete")) with pytest.raises(RuntimeError, match="download failed"): importer.run_jsonl_import() assert events == [] + + +def test_run_jsonl_import_sets_no_marker_when_process_start_fails(monkeypatch): + events = [] + + def failing_start(input_source, country_codes=None): + raise OSError("java not found") + + monkeypatch.setattr(config, "REGION", "andorra") + monkeypatch.setattr(importer, "download_jsonl", lambda region: "/photon/data/temp/andorra.jsonl.zst") + monkeypatch.setattr(importer, "_start_photon_import", failing_start) + monkeypatch.setattr(importer, "clear_temp_dir", lambda: None) + monkeypatch.setattr(importer, "mark_import_started", lambda: events.append("begin")) + monkeypatch.setattr(importer, "mark_import_complete", lambda: events.append("complete")) + + with pytest.raises(OSError, match="java not found"): + importer.run_jsonl_import() + + assert events == [] diff --git a/tests/test_index.py b/tests/test_index.py index 2b19a74d..e653aceb 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -65,39 +65,39 @@ def test_mark_updated_swallows_errors(fake_dirs: Path): def test_import_marker_roundtrip(fake_dirs: Path): assert index.import_was_interrupted() is False - index.begin_import() + index.mark_import_started() assert index.import_was_interrupted() is True - index.complete_import() + index.mark_import_complete() assert index.import_was_interrupted() is False -def test_complete_import_touches_update_timestamp(fake_dirs: Path): - index.begin_import() - index.complete_import() +def test_mark_import_complete_touches_update_timestamp(fake_dirs: Path): + index.mark_import_started() + index.mark_import_complete() assert index.has_update_timestamp() is True -def test_complete_import_is_idempotent(fake_dirs: Path): - index.complete_import() +def test_mark_import_complete_is_idempotent(fake_dirs: Path): + index.mark_import_complete() assert index.import_was_interrupted() is False -def test_begin_import_raises_when_marker_cannot_be_written(fake_dirs: Path): +def test_mark_import_started_raises_when_marker_cannot_be_written(fake_dirs: Path): with patch("src.index.Path.touch", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): - index.begin_import() + index.mark_import_started() -def test_complete_import_raises_when_marker_cannot_be_cleared(fake_dirs: Path): - index.begin_import() +def test_mark_import_complete_raises_when_marker_cannot_be_cleared(fake_dirs: Path): + index.mark_import_started() with patch("src.index.Path.unlink", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): - index.complete_import() + index.mark_import_complete() -def test_complete_import_does_not_mark_updated_when_clear_fails(fake_dirs: Path): - index.begin_import() +def test_mark_import_complete_does_not_mark_updated_when_clear_fails(fake_dirs: Path): + index.mark_import_started() with patch("src.index.Path.unlink", side_effect=OSError("read-only")), pytest.raises(OSError, match="read-only"): - index.complete_import() + index.mark_import_complete() assert index.has_update_timestamp() is False @@ -105,7 +105,7 @@ def test_reconcile_cleans_partial_index(fake_dirs: Path): node_dir = Path(config.OS_NODE_DIR) node_dir.mkdir(parents=True) (node_dir / "segment.bin").write_text("partial") - index.begin_import() + index.mark_import_started() index.reconcile() From a83c2e5f48cc00e9a5937e0aae4ebcca2159e356 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sat, 13 Jun 2026 19:54:27 +0200 Subject: [PATCH 14/27] refactor: standardize ruff config and make args keyword-only --- pyproject.toml | 45 ++++++++++++++++++++++++----- src/check_remote.py | 8 ++--- src/downloader.py | 2 +- src/entrypoint.py | 2 +- src/update.py | 2 +- src/utils/sanitize.py | 2 +- tests/test_process_manager.py | 12 +++++--- tests/utils/test_validate_config.py | 2 +- 8 files changed, 55 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d06ff83a..836fd0fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,6 @@ indent-width = 4 line-length = 120 [tool.ruff.lint] -# select = ["ALL"] ignore = [ "ANN", # flake8-annotations "COM", # flake8-commas @@ -54,10 +53,6 @@ ignore = [ "D105", "D106", "D107", - "D101", - "D107", # missing docstring in public module - "D102", # missing docstring in public class - "D104", # missing docstring in public package "D213", "D203", "D400", @@ -67,8 +62,39 @@ ignore = [ "E501", # line too long "TRY", "SIM105", # faster without contextlib +] # select = ["ALL"] +extend-select = [ + "A", + "B", + "S", + "SIM", + "T20", + "C901", + "RET", + "ICN", + "ISC", + "FA", + "C4", + "UP", + "I", + "E", + "W", + "F", + "RUF", + "ASYNC", + "FAST", + "FBT", + "DTZ", + "LOG", + "PIE", + "PYI", + "PT", + "ARG", + "N", + "PERF", + "FURB", ] -extend-select = ["B", "S", "SIM", "T20", "C901", "RUF"] + fixable = ["ALL"] unfixable = [] # Allow unused variables when underscore-prefixed. @@ -77,8 +103,13 @@ dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.mccabe] max-complexity = 15 +[tool.ruff.lint.isort] +split-on-trailing-comma = false + [tool.ruff.lint.per-file-ignores] -"tests/**/*.py" = ["S101"] +"tests/**/*.py" = ["S101", "FBT", "ARG", "PLR"] +# Static region lookup table: positional booleans are the db_available column. +"src/utils/regions.py" = ["FBT001", "FBT003"] [tool.ruff.format] quote-style = "double" diff --git a/src/check_remote.py b/src/check_remote.py index f35b6ead..483e637d 100644 --- a/src/check_remote.py +++ b/src/check_remote.py @@ -79,10 +79,10 @@ def compare_mtime() -> bool: if using_marker_file: logging.debug("Using marker file timestamp - comparing directly without grace period") return remote_dt > local_dt - else: - logging.debug("Using directory timestamp - applying 144-hour grace period") - grace_period = datetime.timedelta(hours=144) - return remote_dt > (local_dt + grace_period) + + logging.debug("Using directory timestamp - applying 144-hour grace period") + grace_period = datetime.timedelta(hours=144) + return remote_dt > (local_dt + grace_period) def check_index_age() -> bool: diff --git a/src/downloader.py b/src/downloader.py index 12517e5c..d7d5b146 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -22,7 +22,7 @@ def get_available_space(path: str) -> int: return 0 -def check_disk_space_requirements(download_size: int, is_parallel: bool = True) -> bool: +def check_disk_space_requirements(download_size: int, *, is_parallel: bool = True) -> bool: temp_available = get_available_space(config.TEMP_DIR if os.path.exists(config.TEMP_DIR) else config.DATA_DIR) data_available = get_available_space( config.PHOTON_DATA_DIR if os.path.exists(config.PHOTON_DATA_DIR) else config.DATA_DIR diff --git a/src/entrypoint.py b/src/entrypoint.py index f6e56888..f26612b9 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -40,7 +40,7 @@ def log_config() -> None: logger.info("=== END CONFIG VARIABLES ===") -def run_update_or_import(force_update: bool = False) -> None: +def run_update_or_import(*, force_update: bool = False) -> None: if config.IMPORT_MODE == "jsonl": action = "forced JSONL import" if force_update else "initial JSONL import" logger.info(f"Starting {action}") diff --git a/src/update.py b/src/update.py index 0b8d6de6..1f94db06 100644 --- a/src/update.py +++ b/src/update.py @@ -124,7 +124,7 @@ def verify_checksum(md5_file: str, index_file: str) -> bool: raise ChecksumMismatchError(f"Checksum mismatch for {index_file}. Expected: {md5_sum}, Got: {dl_sum}") -def _ensure_disk_space(download_url: str, parallel: bool): +def _ensure_disk_space(download_url: str, *, parallel: bool): try: file_size = get_remote_file_size(download_url) except RemoteFileSizeError as e: diff --git a/src/utils/sanitize.py b/src/utils/sanitize.py index f2d9d99f..9e4276de 100644 --- a/src/utils/sanitize.py +++ b/src/utils/sanitize.py @@ -6,5 +6,5 @@ def sanitize_url(url: str | None) -> str | None: return url parsed = urlparse(url) if parsed.username or parsed.password: - return parsed._replace(netloc=f"***@{parsed.hostname}{':%d' % parsed.port if parsed.port else ''}").geturl() + return parsed._replace(netloc=f"***@{parsed.hostname}{f':{parsed.port}' if parsed.port else ''}").geturl() return url diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 7a2dd1d4..afdc923d 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -99,9 +99,12 @@ def test_start_photon_builds_full_command(manager: process_manager.PhotonManager assert "-Xmx4g" in cmd assert "-cors-any" in cmd assert "/photon/photon.jar" in cmd - assert "-listen-ip" in cmd and "127.0.0.1" in cmd - assert "-data-dir" in cmd and "/data" in cmd - assert "-metrics-enable" in cmd and "prometheus" in cmd + assert "-listen-ip" in cmd + assert "127.0.0.1" in cmd + assert "-data-dir" in cmd + assert "/data" in cmd + assert "-metrics-enable" in cmd + assert "prometheus" in cmd def test_start_photon_retries_until_failure(manager: process_manager.PhotonManager): @@ -151,7 +154,8 @@ def test_stop_photon_force_kills_on_timeout(manager: process_manager.PhotonManag ): manager.stop_photon() signals = [c.args[1] for c in killpg.call_args_list] - assert signal.SIGTERM in signals and signal.SIGKILL in signals + assert signal.SIGTERM in signals + assert signal.SIGKILL in signals def test_stop_photon_handles_lookup_error(manager: process_manager.PhotonManager): diff --git a/tests/utils/test_validate_config.py b/tests/utils/test_validate_config.py index 6ca29812..d53cb6d9 100644 --- a/tests/utils/test_validate_config.py +++ b/tests/utils/test_validate_config.py @@ -91,7 +91,7 @@ def test_validate_config_reports_multiple_errors(monkeypatch: pytest.MonkeyPatch monkeypatch.setattr(config, "UPDATE_INTERVAL", "hourly") monkeypatch.setattr(config, "REGION", "atlantis") - with pytest.raises(ValueError) as exc_info: + with pytest.raises(ValueError, match="Configuration validation failed:") as exc_info: validate_config() message = str(exc_info.value) From 7a8a9d609150e213da7637e954005cf777dbb703 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sat, 13 Jun 2026 20:55:33 +0200 Subject: [PATCH 15/27] Update Photon version to 1.2.0 --- .last_release | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.last_release b/.last_release index 9084fa2f..26aaba0e 100644 --- a/.last_release +++ b/.last_release @@ -1 +1 @@ -1.1.0 +1.2.0 From b062d85bce419bb811d815b2cb6aa9573f4c9e31 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Sat, 13 Jun 2026 23:58:10 +0200 Subject: [PATCH 16/27] fix: correct continent country-code slices and harden update recovery --- .github/workflows/full-test-jsonl.yml | 8 +++++--- src/importer.py | 2 +- src/process_manager.py | 17 ++++++++++++----- src/utils/regions.py | 19 +++++++++++++------ src/utils/validate_config.py | 15 ++++++++++----- tests/test_process_manager.py | 26 ++++++++++++++++++++++++-- tests/utils/test_validate_config.py | 15 +++++++++++++++ 7 files changed, 80 insertions(+), 22 deletions(-) diff --git a/.github/workflows/full-test-jsonl.yml b/.github/workflows/full-test-jsonl.yml index 9b79cd19..e89d255c 100644 --- a/.github/workflows/full-test-jsonl.yml +++ b/.github/workflows/full-test-jsonl.yml @@ -18,10 +18,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Repository - uses: actions/checkout@v6 + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4 - name: Read Photon version from .last_release id: photon_version @@ -35,7 +37,7 @@ jobs: echo "Photon Version: $PHOTON_VERSION" - name: Build test image - uses: docker/build-push-action@v6 + uses: docker/build-push-action@bcafcacb16a39f128d818304e6c9c0c18556b85f # v7 with: context: . file: ./Dockerfile diff --git a/src/importer.py b/src/importer.py index ef36bf82..2f75150d 100644 --- a/src/importer.py +++ b/src/importer.py @@ -24,8 +24,8 @@ def run_jsonl_import() -> None: try: jsonl_path = download_jsonl(parent_region) import_proc = _start_photon_import("-", country_codes=country_codes) - mark_import_started() try: + mark_import_started() if import_proc.stdin is None: raise RuntimeError("Photon import process stdin is unavailable") for chunk in stream_decompress(jsonl_path): diff --git a/src/process_manager.py b/src/process_manager.py index 5674436a..5d8505ee 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -289,12 +289,19 @@ def _run_pending_jobs(self): def monitor_photon(self): while not self.should_exit: - if self.photon_process and self.state == AppState.RUNNING: - ret = self.photon_process.poll() - if ret is not None: - logger.warning(f"Photon exited with code {ret}, restarting...") + if self.state == AppState.RUNNING: + if self.photon_process is None: + logger.warning("Photon is not running while in RUNNING state, restarting...") if not self.start_photon(): - logger.error("Failed to restart Photon after unexpected exit") + logger.error("Failed to restart Photon, exiting for supervisor to recover") + sys.exit(1) + else: + ret = self.photon_process.poll() + if ret is not None: + logger.warning(f"Photon exited with code {ret}, restarting...") + if not self.start_photon(): + logger.error("Failed to restart Photon, exiting for supervisor to recover") + sys.exit(1) time.sleep(5) def shutdown(self): diff --git a/src/utils/regions.py b/src/utils/regions.py index 8b13298d..2b8e0fac 100644 --- a/src/utils/regions.py +++ b/src/utils/regions.py @@ -219,12 +219,19 @@ "AR", ] -AFRICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[:56] -ASIA_COUNTRY_CODES = PLANET_COUNTRY_CODES[56:104] -AUSTRALIA_OCEANIA_COUNTRY_CODES = PLANET_COUNTRY_CODES[104:122] -EUROPE_COUNTRY_CODES = PLANET_COUNTRY_CODES[122:176] -NORTH_AMERICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[176:207] -SOUTH_AMERICA_COUNTRY_CODES = PLANET_COUNTRY_CODES[207:220] + +def _slice_codes(start_code: str, end_code: str) -> list[str]: + start_idx = PLANET_COUNTRY_CODES.index(start_code) + end_idx = PLANET_COUNTRY_CODES.index(end_code) + 1 + return PLANET_COUNTRY_CODES[start_idx:end_idx] + + +AFRICA_COUNTRY_CODES = _slice_codes("DZ", "SH") +ASIA_COUNTRY_CODES = _slice_codes("KZ", "JP") +AUSTRALIA_OCEANIA_COUNTRY_CODES = _slice_codes("AU", "VU") +EUROPE_COUNTRY_CODES = _slice_codes("AL", "SK") +NORTH_AMERICA_COUNTRY_CODES = _slice_codes("BZ", "MX") +SOUTH_AMERICA_COUNTRY_CODES = _slice_codes("CL", "AR") def _region(region_type: str, continent: str | None, db_available: bool, country_codes: list[str]) -> dict: diff --git a/src/utils/validate_config.py b/src/utils/validate_config.py index 0e4fa23c..588452f4 100644 --- a/src/utils/validate_config.py +++ b/src/utils/validate_config.py @@ -2,7 +2,7 @@ from src.utils import config from src.utils.logger import get_logger -from src.utils.regions import get_regions_for_jsonl, is_valid_region +from src.utils.regions import get_region_info, get_regions_for_jsonl, is_valid_region logging = get_logger() @@ -27,12 +27,17 @@ def validate_config(): ) if config.IMPORT_MODE == "db": - if config.REGION and not is_valid_region(config.REGION): - error_messages.append( - f"Invalid REGION: '{config.REGION}'. Must be a valid continent, sub-region, or 'planet'." - ) if config.REGION and len(config.get_jsonl_regions()) > 1: error_messages.append("DB mode supports exactly one region in REGION.") + elif config.REGION: + if not is_valid_region(config.REGION): + error_messages.append( + f"Invalid REGION: '{config.REGION}'. Must be a valid continent, sub-region, or 'planet'." + ) + else: + region_info = get_region_info(config.REGION) + if region_info and not region_info.get("db_available", False): + error_messages.append(f"DB index is not available for REGION: '{config.REGION}'.") if config.IMPORT_MODE == "jsonl": if config.FILE_URL: diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index afdc923d..9695faab 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -450,18 +450,40 @@ def restart(): assert call_count["n"] == 1 -def test_monitor_photon_logs_failed_restart(manager: process_manager.PhotonManager): +def test_monitor_photon_exits_on_failed_restart(manager: process_manager.PhotonManager): fake_proc = MagicMock() fake_proc.poll.return_value = 1 manager.photon_process = fake_proc manager.state = process_manager.AppState.RUNNING + with patch.object(manager, "start_photon", return_value=False), pytest.raises(SystemExit) as exc: + manager.monitor_photon() + assert exc.value.code == 1 + + +def test_monitor_photon_restarts_when_process_handle_missing(manager: process_manager.PhotonManager): + manager.photon_process = None + manager.state = process_manager.AppState.RUNNING + + call_count = {"n": 0} + def restart(): + call_count["n"] += 1 manager.should_exit = True - return False + return True with patch.object(manager, "start_photon", side_effect=restart): manager.monitor_photon() + assert call_count["n"] == 1 + + +def test_monitor_photon_exits_when_handle_missing_and_restart_fails(manager: process_manager.PhotonManager): + manager.photon_process = None + manager.state = process_manager.AppState.RUNNING + + with patch.object(manager, "start_photon", return_value=False), pytest.raises(SystemExit) as exc: + manager.monitor_photon() + assert exc.value.code == 1 def test_shutdown_calls_stop_and_exits(manager: process_manager.PhotonManager): diff --git a/tests/utils/test_validate_config.py b/tests/utils/test_validate_config.py index d53cb6d9..b1c18d6f 100644 --- a/tests/utils/test_validate_config.py +++ b/tests/utils/test_validate_config.py @@ -85,6 +85,21 @@ def test_validate_config_rejects_multiple_db_regions(monkeypatch: pytest.MonkeyP validate_config() +def test_validate_config_accepts_db_available_region(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "REGION", "germany") + + validate_config() + + +def test_validate_config_rejects_db_unavailable_region(monkeypatch: pytest.MonkeyPatch): + _set_base_config(monkeypatch) + monkeypatch.setattr(config, "REGION", "albania") + + with pytest.raises(ValueError, match="DB index is not available for REGION: 'albania'"): + validate_config() + + def test_validate_config_reports_multiple_errors(monkeypatch: pytest.MonkeyPatch): _set_base_config(monkeypatch) monkeypatch.setattr(config, "UPDATE_STRATEGY", "WRONG") From 56032fc28417a0f6c79ce3cd165396b02e4c44e7 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Wed, 17 Jun 2026 18:02:36 +0200 Subject: [PATCH 17/27] feat: checksum retry, move before extraction, add notification --- src/entrypoint.py | 1 + src/process_manager.py | 3 +- src/update.py | 37 ++++++++++++++---- src/utils/config.py | 1 + tests/test_update.py | 86 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 120 insertions(+), 8 deletions(-) diff --git a/src/entrypoint.py b/src/entrypoint.py index f26612b9..c5640f14 100644 --- a/src/entrypoint.py +++ b/src/entrypoint.py @@ -22,6 +22,7 @@ def log_config() -> None: logger.info(f"IMPORT_GEOMETRIES: {config.IMPORT_GEOMETRIES}") logger.info(f"FORCE_UPDATE: {config.FORCE_UPDATE}") logger.info(f"DOWNLOAD_MAX_RETRIES: {config.DOWNLOAD_MAX_RETRIES}") + logger.info(f"CHECKSUM_MAX_RETRIES: {config.CHECKSUM_MAX_RETRIES}") logger.info(f"FILE_URL (sanitized): {sanitize_url(config.FILE_URL)}") logger.info(f"MD5_URL (sanitized): {sanitize_url(config.MD5_URL)}") logger.info(f"PHOTON_PARAMS: {config.PHOTON_PARAMS}") diff --git a/src/process_manager.py b/src/process_manager.py index 5d8505ee..4756299b 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -318,8 +318,9 @@ def run(self): except update.InsufficientSpaceError: logger.error("Setup failed: insufficient disk space") sys.exit(75) - except Exception: + except Exception as e: logger.exception("Setup failed!") + send_notification(f"Photon-Docker setup failed - {e}") sys.exit(1) if not self.start_photon(): diff --git a/src/update.py b/src/update.py index 1f94db06..a131a001 100644 --- a/src/update.py +++ b/src/update.py @@ -7,6 +7,7 @@ from src.downloader import check_disk_space_requirements, clear_temp_dir, download_file, prepare_temp_dir from src.utils import config from src.utils.logger import get_logger +from src.utils.notify import send_notification from src.utils.regions import get_index_url_path from src.utils.sanitize import sanitize_url @@ -143,6 +144,34 @@ def _ensure_disk_space(download_url: str, *, parallel: bool): raise InsufficientSpaceError("Insufficient disk space for update") +def _download_verified_index() -> str: + max_attempts = max(1, int(config.CHECKSUM_MAX_RETRIES)) + + for attempt in range(1, max_attempts + 1): + logging.info("Downloading index") + index_file = download_index() + + if config.SKIP_MD5_CHECK: + return index_file + + md5_file = download_md5() + logging.info("Verifying checksum...") + try: + verify_checksum(md5_file, index_file) + return index_file + except ChecksumMismatchError as e: + if attempt >= max_attempts: + logging.error(f"Checksum verification failed after {max_attempts} attempt(s): {e}") + raise + + logging.warning(f"Checksum verification failed (attempt {attempt}/{max_attempts}), re-downloading: {e}") + send_notification( + f"Photon index download corrupted (checksum mismatch), re-downloading (attempt {attempt}/{max_attempts})" + ) + + raise UpdateError("Index download failed unexpectedly") + + def run_update(strategy: str): logging.info(f"Starting {strategy.lower()} update pipeline...") @@ -151,16 +180,10 @@ def run_update(strategy: str): download_url = get_download_url() _ensure_disk_space(download_url, parallel=strategy == "PARALLEL") - logging.info("Downloading index") - index_file = download_index() + index_file = _download_verified_index() extract_index(index_file) - if not config.SKIP_MD5_CHECK: - md5_file = download_md5() - logging.info("Verifying checksum...") - verify_checksum(md5_file, index_file) - logging.info("Activating new index") index.activate(os.path.join(config.TEMP_DIR, "photon_data")) clear_temp_dir() diff --git a/src/utils/config.py b/src/utils/config.py index 73e580aa..1ea40cd3 100644 --- a/src/utils/config.py +++ b/src/utils/config.py @@ -10,6 +10,7 @@ IMPORT_GEOMETRIES = os.getenv("IMPORT_GEOMETRIES", "False").lower() in ("true", "1", "t") FORCE_UPDATE = os.getenv("FORCE_UPDATE", "False").lower() in ("true", "1", "t") DOWNLOAD_MAX_RETRIES = os.getenv("DOWNLOAD_MAX_RETRIES", "3") +CHECKSUM_MAX_RETRIES = os.getenv("CHECKSUM_MAX_RETRIES", "3") FILE_URL = os.getenv("FILE_URL") MD5_URL = os.getenv("MD5_URL") PHOTON_PARAMS = os.getenv("PHOTON_PARAMS") diff --git a/tests/test_update.py b/tests/test_update.py index a449bf1e..5e426cc5 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -342,6 +342,92 @@ def boom(*_): assert activated["n"] == 0 +def test_run_update_redownloads_on_checksum_mismatch(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + monkeypatch.setattr(config, "CHECKSUM_MAX_RETRIES", "3") + _make_pipeline_patches(monkeypatch) + + downloads = {"n": 0} + + def fake_download_index(): + downloads["n"] += 1 + return str(Path(config.TEMP_DIR) / "index.tar.bz2") + + monkeypatch.setattr(update, "download_index", fake_download_index) + + verifications = {"n": 0} + + def fake_verify(*_): + verifications["n"] += 1 + if verifications["n"] < 2: + raise update.ChecksumMismatchError("mismatch") + return True + + monkeypatch.setattr(update, "verify_checksum", fake_verify) + + extracted = {"n": 0} + monkeypatch.setattr(update, "extract_index", lambda _: extracted.__setitem__("n", extracted["n"] + 1)) + activated = {"n": 0} + monkeypatch.setattr(update.index, "activate", lambda _: activated.__setitem__("n", activated["n"] + 1)) + + with patch("src.update.send_notification") as notify: + update.run_update("PARALLEL") + + assert downloads["n"] == 2 + assert extracted["n"] == 1 + assert activated["n"] == 1 + assert notify.call_count == 1 + + +def test_run_update_checksum_mismatch_exhausts_retries(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + monkeypatch.setattr(config, "CHECKSUM_MAX_RETRIES", "2") + _make_pipeline_patches(monkeypatch) + + downloads = {"n": 0} + + def fake_download_index(): + downloads["n"] += 1 + return str(Path(config.TEMP_DIR) / "index.tar.bz2") + + monkeypatch.setattr(update, "download_index", fake_download_index) + + def always_mismatch(*_): + raise update.ChecksumMismatchError("persistent mismatch") + + monkeypatch.setattr(update, "verify_checksum", always_mismatch) + + extracted = {"n": 0} + monkeypatch.setattr(update, "extract_index", lambda _: extracted.__setitem__("n", extracted["n"] + 1)) + activated = {"n": 0} + monkeypatch.setattr(update.index, "activate", lambda _: activated.__setitem__("n", activated["n"] + 1)) + + with ( + patch("src.update.send_notification") as notify, + pytest.raises(update.ChecksumMismatchError, match="persistent mismatch"), + ): + update.run_update("SEQUENTIAL") + + assert downloads["n"] == 2 + assert extracted["n"] == 0 + assert activated["n"] == 0 + assert notify.call_count == 1 + + +def test_run_update_verifies_before_extracting(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) + monkeypatch.setattr(config, "CHECKSUM_MAX_RETRIES", "1") + _make_pipeline_patches(monkeypatch) + + order: list[str] = [] + monkeypatch.setattr(update, "verify_checksum", lambda *_: order.append("verify") or True) + monkeypatch.setattr(update, "extract_index", lambda _: order.append("extract")) + + update.run_update("PARALLEL") + + assert order == ["verify", "extract"] + + def test_run_update_extraction_failure_prevents_activation(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) _make_pipeline_patches(monkeypatch) From fd75833cfcbfb33e416e9e341413ac48891579c6 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Thu, 18 Jun 2026 14:49:09 +0200 Subject: [PATCH 18/27] docs(README): update index info, add import mode and env vars --- README.md | 112 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 84 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index f6bd666e..5d155c94 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Docker Pulls](https://img.shields.io/docker/pulls/rtuszik/photon-docker) ![Docker Image Size](https://img.shields.io/docker/image-size/rtuszik/photon-docker) ![Docker Image Version](https://img.shields.io/docker/v/rtuszik/photon-docker) ![GitHub Release](https://img.shields.io/github/v/release/komoot/photon?label=Photon) ![Lint Status](https://github.com/rtuszik/photon-docker/actions/workflows/lint.yml/badge.svg) +![Docker Pulls](https://img.shields.io/docker/pulls/rtuszik/photon-docker) ![Docker Image Size](https://img.shields.io/docker/imje-size/rtuszik/photon-docker) ![Docker Image Version](https://img.shields.io/docker/v/rtuszik/photon-docker) ![GitHub Release](https://img.shields.io/github/v/release/komoot/photon?label=Photon) ![Lint Status](https://github.com/rtuszik/photon-docker/actions/workflows/lint.yml/badge.svg) # Photon Docker Image @@ -15,12 +15,15 @@ enhancing data privacy and integration capabilities with services like [Dawarich ⚠️ **Warning: Large File Sizes** ⚠️ -- The Photon index file is fairly large and growing steadily. - As of the beginning of 2025, around 90GB are needed for the full index. Note that this will grow over time. +- The Photon index is quite large and growing steadily. + As of mid-2026, the compressed planet index is around 60GB in `db` mode, the planet JSONL dump is around 26GB in `jsonl` mode. These will grow over time. - Ensure you have sufficient disk space available before running the container. - The initial download and extraction process may take a considerable amount of time. Depending on your hardware, checksum verification and decompression may take multiple hours. +- The JSONL import _will_ take a signficant amount of time. + As a point of reference, a full planet import, tested on a fresh VPS (4C/16GB) took 10hours and 25minutes. + - To reduce the load on the official Photon servers, the default `BASE_URL` for downloading the index files points to a mirror hosted by my. Please see the **Community Mirrors** section for more details. @@ -55,39 +58,86 @@ docker compose up -d The container can be configured using the following environment variables: -| Variable | Parameters | Default | Description | -| ---------------------- | -------------------------------------- | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `UPDATE_STRATEGY` | `PARALLEL`, `SEQUENTIAL`, `DISABLED` | `SEQUENTIAL` | Controls how index updates are handled. `PARALLEL` downloads the new index in the background then swaps with minimal downtime (requires 2x space). `SEQUENTIAL` stops Photon, deletes the existing index, downloads the new one, then restarts. `DISABLED` prevents automatic updates. | -| `UPDATE_INTERVAL` | Time string (e.g., "720h", "30d") | `30d` | How often to check for updates. To reduce server load, it is recommended to set this to a long interval (e.g., `720h` for 30 days) or disable updates altogether if you do not need the latest data. | -| `REGION` | Region name, country code, or `planet` | `planet` | Optional region for a specific dataset. Can be a continent (`europe`, `asia`), individual country/region (`germany`, `usa`, `japan`), country code (`de`, `us`, `jp`), or `planet` for worldwide data. See [Available Regions](#available-regions) section for details. | -| `LOG_LEVEL` | `DEBUG`, `INFO`, `ERROR` | `INFO` | Controls logging verbosity. | -| `PHOTON_LISTEN_IP` | IP Address | 0.0.0.0 | Populates `-listen-ip` parameter for photon | -| `FORCE_UPDATE` | `TRUE`, `FALSE` | `FALSE` | Forces an index update on container startup, regardless of `UPDATE_STRATEGY`. | -| `DOWNLOAD_MAX_RETRIES` | Number | `3` | Maximum number of retries for failed downloads. | -| `INITIAL_DOWNLOAD` | `TRUE`, `FALSE` | `TRUE` | Controls whether the container performs the initial index download when the Photon data directory is empty. Useful for manual imports. | -| `BASE_URL` | Valid URL | `https://r2.koalasec.org/public` | Custom base URL for index data downloads. Should point to the parent directory of index files. The default has been changed to a community mirror to reduce load on the GraphHopper servers. | -| `SKIP_MD5_CHECK` | `TRUE`, `FALSE` | `FALSE` | Optionally skip MD5 verification of downloaded index files. | -| `SKIP_SPACE_CHECK` | `TRUE`, `FALSE` | `FALSE` | Skip disk space verification before downloading. | -| `FILE_URL` | URL to a .tar.bz2 file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 format. Setting this overrides `UPDATE_STRATEGY` to `DISABLED`, and `SKIP_MD5_CHECK` to true if `MD5_URL` is not set. | -| `MD5_URL` | URL to the MD5 file to use | - | Set a custom URL for the md5 file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2.md5"). | -| `PHOTON_PARAMS` | Photon executable parameters | - | See `https://github.com/komoot/photon#running-photon.` | -| `APPRISE_URLS` | Comma-separated Apprise URLs | - | Optional notification URLs for [Apprise](https://github.com/caronc/apprise) to send status updates (e.g., download completion, errors). Supports multiple services like Pushover, Slack, email, etc. Example: `pover://user@token,mailto://user:pass@gmail.com` | -| `PUID` | User ID | 9011 | The User ID for the photon process. Set this to your host user's ID (`id -u`) to prevent permission errors when using bind mounts. | -| `PGID` | Group ID | 9011 | The Group ID for the photon process. Set this to your host group's ID (`id -g`) to prevent permission errors when using bind mounts. | -| `ENABLE_METRICS` | `TRUE`, `FALSE` | `FALSE` | Enables Prometheus Metrics endpoint at /metrics | +| Variable | Parameters | Default | Description | +| ---------------------- | -------------------------------------- | -------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `IMPORT_MODE` | `db`, `jsonl` | `db` | Selects how the index is built. `db` downloads a prebuilt index for a single region. `jsonl` (**experimental**) builds the index in-container from OpenStreetMap JSONL dumps and supports combining multiple regions. See [Import Modes](#import-modes). | +| `UPDATE_STRATEGY` | `PARALLEL`, `SEQUENTIAL`, `DISABLED` | `SEQUENTIAL` | Controls how index updates are handled. `PARALLEL` downloads the new index in the background then swaps with minimal downtime (requires 2x space). `SEQUENTIAL` stops Photon, deletes the existing index, downloads the new one, then restarts. `DISABLED` prevents automatic updates. Only applies in `db` mode. | +| `UPDATE_INTERVAL` | Time string (e.g., "720h", "30d") | `30d` | How often to check for updates. To reduce server load, it is recommended to set this to a long interval (e.g., `720h` for 30 days) or disable updates altogether if you do not need the latest data. | +| `REGION` | Region name, country code, or `planet` | `planet` | Region for a specific dataset. Can be a continent (`europe`, `asia`), individual country/region (`germany`, `usa`, `japan`), country code (`de`, `us`, `jp`), or `planet` for worldwide data. In `db` mode exactly one region may be set, and it must be one with a prebuilt index. In `jsonl` mode you may pass multiple regions as a comma-separated list. See [Available Regions](#available-regions) section for details. | +| `LANGUAGES` | Comma-separated language codes | - | Only used in `jsonl` mode. Languages to import, passed to Photon's `-languages` (e.g. `en,de,fr`). | +| `EXTRA_TAGS` | Comma-separated OSM tags | - | Only used in `jsonl` mode. Additional OSM tags to import, passed to Photon's `-extra-tags`. | +| `IMPORT_GEOMETRIES` | `TRUE`, `FALSE` | `FALSE` | Only used in `jsonl` mode. When `TRUE`, imports full geometries (`-full-geometries`) instead of centroids. | +| `LOG_LEVEL` | `DEBUG`, `INFO`, `ERROR` | `INFO` | Controls logging verbosity. | +| `PHOTON_LISTEN_IP` | IP Address | 0.0.0.0 | Populates `-listen-ip` parameter for photon | +| `FORCE_UPDATE` | `TRUE`, `FALSE` | `FALSE` | Forces an index update on container startup, regardless of `UPDATE_STRATEGY`. | +| `DOWNLOAD_MAX_RETRIES` | Number | `3` | Maximum number of retries for failed downloads. | +| `CHECKSUM_MAX_RETRIES` | Number | `3` | Maximum number of download attempts when the MD5 checksum of the downloaded index does not match. A corrupted download is re-fetched up to this many times before the update fails. Ignored when `SKIP_MD5_CHECK` is enabled. | +| `INITIAL_DOWNLOAD` | `TRUE`, `FALSE` | `TRUE` | Controls whether the container performs the initial index download when the Photon data directory is empty. Useful for manual imports. | +| `BASE_URL` | Valid URL | `https://r2.koalasec.org/public` | Custom base URL for index data downloads. Should point to the parent directory of index files. The default has been changed to a community mirror to reduce load on the GraphHopper servers. | +| `SKIP_MD5_CHECK` | `TRUE`, `FALSE` | `FALSE` | Optionally skip MD5 verification of downloaded index files. | +| `SKIP_SPACE_CHECK` | `TRUE`, `FALSE` | `FALSE` | Skip disk space verification before downloading. | +| `FILE_URL` | URL to a .tar.bz2 file | - | Set a custom URL for the index file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2"). This must be a tar.bz2 format. Setting this overrides `UPDATE_STRATEGY` to `DISABLED`, and `SKIP_MD5_CHECK` to true if `MD5_URL` is not set. | +| `MD5_URL` | URL to the MD5 file to use | - | Set a custom URL for the md5 file to be downloaded (e.g., "https://download1.graphhopper.com/public/experimental/photon-db-latest.tar.bz2.md5"). | +| `PHOTON_PARAMS` | Photon executable parameters | - | See `https://github.com/komoot/photon#running-photon.` | +| `JAVA_PARAMS` | Java parameters | - | Extra parameters passed to the `java` command that runs Photon (e.g. heap settings like `-Xmx4g`). | +| `APPRISE_URLS` | Comma-separated Apprise URLs | - | Optional notification URLs for [Apprise](https://github.com/caronc/apprise) to send status updates (e.g., download completion, errors). Supports multiple services like Pushover, Slack, email, etc. Example: `pover://user@token,mailto://user:pass@gmail.com` | +| `PUID` | User ID | 9011 | The User ID for the photon process. Set this to your host user's ID (`id -u`) to prevent permission errors when using bind mounts. | +| `PGID` | Group ID | 9011 | The Group ID for the photon process. Set this to your host group's ID (`id -g`) to prevent permission errors when using bind mounts. | +| `ENABLE_METRICS` | `TRUE`, `FALSE` | `FALSE` | Enables Prometheus Metrics endpoint at /metrics | + +## Import Modes + +The image can build its search index in two ways, selected with `IMPORT_MODE`: + +- **`db` (default):** Downloads a prebuilt index (`.tar.bz2`) for a single region. This is the original behaviour and supports scheduled updates via `UPDATE_STRATEGY` / `UPDATE_INTERVAL`. +- **`jsonl` (experimental):** Builds the index inside the container from compressed OpenStreetMap JSONL dumps. Supports combining multiple regions and tuning the import with `LANGUAGES`, `EXTRA_TAGS`, and `IMPORT_GEOMETRIES`. + +### JSONL Import Mode (Experimental) + +> ⚠️ **Experimental:** `jsonl` mode is new and still being stabilised. Its behaviour and configuration may change in future releases, and it is not yet recommended for production use. For stable deployments, use the default `db` mode. + +Set `IMPORT_MODE=jsonl` and list one or more regions in `REGION` (comma-separated). The dump is streamed and decompressed directly into Photon's importer, so no decompressed copy is written to disk. + +```yaml +services: + photon: + image: rtuszik/photon-docker:latest + environment: + - IMPORT_MODE=jsonl + - REGION=germany,austria,switzerland-liechtenstein + - LANGUAGES=en,de + # - EXTRA_TAGS=surface,smoothness # Optional + # - IMPORT_GEOMETRIES=true # Optional: full geometries + volumes: + - photon_data:/photon/data + restart: unless-stopped + ports: + - "2322:2322" +volumes: + photon_data: +``` + +When multiple regions are requested, the smallest dump that covers all of them (a continent, or `planet`) is downloaded, then filtered down to the requested countries. + +> ⚠️ **Note:** `jsonl` mode does not currently support scheduled or automatic updates. `UPDATE_STRATEGY` and `UPDATE_INTERVAL` are ignored, and the index is built once at startup. To rebuild, set `FORCE_UPDATE=TRUE` or recreate the data volume. `FILE_URL` and `MD5_URL` are not supported in this mode. ## Available Regions +Region availability depends on `IMPORT_MODE`. **All** regions listed below are available as JSONL dumps (`jsonl` mode). Only a subset have a prebuilt **DB** index, these are the only valid `REGION` values when `IMPORT_MODE=db`. + ### 1. Planet-wide Data (This is the default if no region is specified) - **Region**: `planet` -- **Size**: ~116GB +- **Availability**: DB and JSONL +- **Size**: ~61GB compressed (DB), ~26GB compressed (JSONL) - **Coverage**: Worldwide ### 2. Continental Data +Available in both DB and JSONL modes. The sizes below are approximate older estimates and may not reflect current dump sizes. + - **africa** (~2.8GB) - **asia** (~13.5GB) - **australia-oceania** (~2.9GB) @@ -95,9 +145,9 @@ The container can be configured using the following environment variables: - **north-america** (~29.5GB) - **south-america** (~13.8GB) -### 3. Individual Countries/Regions +### 3. Individual Countries/Regions (DB and JSONL) -Only **16 regions** have individual database downloads available: +Only **16 regions** have a prebuilt **DB** index. They are also available as JSONL dumps: #### Asia (2 regions) @@ -127,6 +177,12 @@ Only **16 regions** have individual database downloads available: - **argentina** (also: `ar`) +### 4. JSONL-only Individual Regions (Experimental) + +These regions are available **only** as JSONL dumps (experimental `jsonl` mode); they have no prebuilt DB index. All are sub-regions of Europe. Two-letter country-code aliases work where defined (e.g. `pl`, `se`, `ch`). + +- **albania** (`al`), **baltics** (`ee`/`lt`/`lv`), **belarus** (`by`), **belgium** (`be`), **bosnia-herzegovina** (`ba`), **british-islands** (also: `uk`, `great britain`), **bulgaria** (`bg`), **croatia** (`hr`), **cyprus** (`cy`), **czech-republic** (`cz`, `czechia`), **finland** (`fi`), **georgia** (`ge`), **greece** (`gr`), **hungary** (`hu`), **iceland-faroe** (`is`), **ireland** (`ie`), **italy** (`it`), **kosovo** (`xk`), **macedonia** (`mk`), **malta** (`mt`), **moldova** (`md`), **montenegro** (`me`), **norway** (`no`), **poland** (`pl`), **portugal** (`pt`), **romania** (`ro`), **serbia** (`rs`), **slovenia** (`si`), **sweden** (`se`), **switzerland-liechtenstein** (`ch`), **turkey** (`tr`), **ukraine** (`ua`) + ### Usage Examples ```yaml @@ -157,7 +213,7 @@ If you are hosting a public mirror, please open an issue or pull request to have ## Metrics -When `ENABLE_METRICS` is set to `TRUE`, Prometheus metrics are exposed through the at the `/metrics` endpoint. +When `ENABLE_METRICS` is set to `TRUE`, Prometheus metrics are exposed at the `/metrics` endpoint. An example Grafana Dashboard is available here at [Grafana Labs](https://grafana.com/grafana/dashboards/24901-photon/). @@ -187,7 +243,7 @@ PHOTON_API_USE_HTTPS=false ### Build and Run Locally ```bash -docker compose -f docker-compose.build.yml build --build-arg PHOTON_VERSION=0.6.2 +docker compose -f docker-compose.build.yml build --build-arg PHOTON_VERSION=1.2.0 ``` ### Accessing the API From 50fbd4163f9e2f9fc7c15eb4cbd931fe08ef17d1 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Thu, 18 Jun 2026 14:52:25 +0200 Subject: [PATCH 19/27] chore: uv lock update --- uv.lock | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/uv.lock b/uv.lock index 90d70b3c..db63d721 100644 --- a/uv.lock +++ b/uv.lock @@ -226,6 +226,7 @@ dependencies = [ { name = "schedule" }, { name = "tqdm" }, { name = "urllib3" }, + { name = "zstandard" }, ] [package.dev-dependencies] @@ -247,6 +248,7 @@ requires-dist = [ { name = "schedule", specifier = ">=1.2.2" }, { name = "tqdm", specifier = "==4.67.3" }, { name = "urllib3", specifier = "==2.7.0" }, + { name = "zstandard", specifier = ">=0.23.0" }, ] [package.metadata.requires-dev] @@ -531,3 +533,60 @@ sdist = { url = "https://files.pythonhosted.org/packages/8e/25/925f35db758a0f919 wheels = [ { url = "https://files.pythonhosted.org/packages/a0/56/0cc15b8ff2613c1d5c3dc1f3f576ede1c43868c1bc2e5ccaa2d4bcd7974d/vulture-2.14-py2.py3-none-any.whl", hash = "sha256:d9a90dba89607489548a49d557f8bac8112bd25d3cbc8aeef23e860811bd5ed9", size = 28915, upload-time = "2024-12-08T17:39:40.573Z" }, ] + +[[package]] +name = "zstandard" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/aa/3e0508d5a5dd96529cdc5a97011299056e14c6505b678fd58938792794b1/zstandard-0.25.0.tar.gz", hash = "sha256:7713e1179d162cf5c7906da876ec2ccb9c3a9dcbdffef0cc7f70c3667a205f0b", size = 711513, upload-time = "2025-09-14T22:15:54.002Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/82/fc/f26eb6ef91ae723a03e16eddb198abcfce2bc5a42e224d44cc8b6765e57e/zstandard-0.25.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b3c3a3ab9daa3eed242d6ecceead93aebbb8f5f84318d82cee643e019c4b73b", size = 795738, upload-time = "2025-09-14T22:16:56.237Z" }, + { url = "https://files.pythonhosted.org/packages/aa/1c/d920d64b22f8dd028a8b90e2d756e431a5d86194caa78e3819c7bf53b4b3/zstandard-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:913cbd31a400febff93b564a23e17c3ed2d56c064006f54efec210d586171c00", size = 640436, upload-time = "2025-09-14T22:16:57.774Z" }, + { url = "https://files.pythonhosted.org/packages/53/6c/288c3f0bd9fcfe9ca41e2c2fbfd17b2097f6af57b62a81161941f09afa76/zstandard-0.25.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:011d388c76b11a0c165374ce660ce2c8efa8e5d87f34996aa80f9c0816698b64", size = 5343019, upload-time = "2025-09-14T22:16:59.302Z" }, + { url = "https://files.pythonhosted.org/packages/1e/15/efef5a2f204a64bdb5571e6161d49f7ef0fffdbca953a615efbec045f60f/zstandard-0.25.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6dffecc361d079bb48d7caef5d673c88c8988d3d33fb74ab95b7ee6da42652ea", size = 5063012, upload-time = "2025-09-14T22:17:01.156Z" }, + { url = "https://files.pythonhosted.org/packages/b7/37/a6ce629ffdb43959e92e87ebdaeebb5ac81c944b6a75c9c47e300f85abdf/zstandard-0.25.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:7149623bba7fdf7e7f24312953bcf73cae103db8cae49f8154dd1eadc8a29ecb", size = 5394148, upload-time = "2025-09-14T22:17:03.091Z" }, + { url = "https://files.pythonhosted.org/packages/e3/79/2bf870b3abeb5c070fe2d670a5a8d1057a8270f125ef7676d29ea900f496/zstandard-0.25.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6a573a35693e03cf1d67799fd01b50ff578515a8aeadd4595d2a7fa9f3ec002a", size = 5451652, upload-time = "2025-09-14T22:17:04.979Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/7be26e610767316c028a2cbedb9a3beabdbe33e2182c373f71a1c0b88f36/zstandard-0.25.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5a56ba0db2d244117ed744dfa8f6f5b366e14148e00de44723413b2f3938a902", size = 5546993, upload-time = "2025-09-14T22:17:06.781Z" }, + { url = "https://files.pythonhosted.org/packages/85/c7/3483ad9ff0662623f3648479b0380d2de5510abf00990468c286c6b04017/zstandard-0.25.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:10ef2a79ab8e2974e2075fb984e5b9806c64134810fac21576f0668e7ea19f8f", size = 5046806, upload-time = "2025-09-14T22:17:08.415Z" }, + { url = "https://files.pythonhosted.org/packages/08/b3/206883dd25b8d1591a1caa44b54c2aad84badccf2f1de9e2d60a446f9a25/zstandard-0.25.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:aaf21ba8fb76d102b696781bddaa0954b782536446083ae3fdaa6f16b25a1c4b", size = 5576659, upload-time = "2025-09-14T22:17:10.164Z" }, + { url = "https://files.pythonhosted.org/packages/9d/31/76c0779101453e6c117b0ff22565865c54f48f8bd807df2b00c2c404b8e0/zstandard-0.25.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1869da9571d5e94a85a5e8d57e4e8807b175c9e4a6294e3b66fa4efb074d90f6", size = 4953933, upload-time = "2025-09-14T22:17:11.857Z" }, + { url = "https://files.pythonhosted.org/packages/18/e1/97680c664a1bf9a247a280a053d98e251424af51f1b196c6d52f117c9720/zstandard-0.25.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:809c5bcb2c67cd0ed81e9229d227d4ca28f82d0f778fc5fea624a9def3963f91", size = 5268008, upload-time = "2025-09-14T22:17:13.627Z" }, + { url = "https://files.pythonhosted.org/packages/1e/73/316e4010de585ac798e154e88fd81bb16afc5c5cb1a72eeb16dd37e8024a/zstandard-0.25.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f27662e4f7dbf9f9c12391cb37b4c4c3cb90ffbd3b1fb9284dadbbb8935fa708", size = 5433517, upload-time = "2025-09-14T22:17:16.103Z" }, + { url = "https://files.pythonhosted.org/packages/5b/60/dd0f8cfa8129c5a0ce3ea6b7f70be5b33d2618013a161e1ff26c2b39787c/zstandard-0.25.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99c0c846e6e61718715a3c9437ccc625de26593fea60189567f0118dc9db7512", size = 5814292, upload-time = "2025-09-14T22:17:17.827Z" }, + { url = "https://files.pythonhosted.org/packages/fc/5f/75aafd4b9d11b5407b641b8e41a57864097663699f23e9ad4dbb91dc6bfe/zstandard-0.25.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:474d2596a2dbc241a556e965fb76002c1ce655445e4e3bf38e5477d413165ffa", size = 5360237, upload-time = "2025-09-14T22:17:19.954Z" }, + { url = "https://files.pythonhosted.org/packages/ff/8d/0309daffea4fcac7981021dbf21cdb2e3427a9e76bafbcdbdf5392ff99a4/zstandard-0.25.0-cp312-cp312-win32.whl", hash = "sha256:23ebc8f17a03133b4426bcc04aabd68f8236eb78c3760f12783385171b0fd8bd", size = 436922, upload-time = "2025-09-14T22:17:24.398Z" }, + { url = "https://files.pythonhosted.org/packages/79/3b/fa54d9015f945330510cb5d0b0501e8253c127cca7ebe8ba46a965df18c5/zstandard-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffef5a74088f1e09947aecf91011136665152e0b4b359c42be3373897fb39b01", size = 506276, upload-time = "2025-09-14T22:17:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/ea/6b/8b51697e5319b1f9ac71087b0af9a40d8a6288ff8025c36486e0c12abcc4/zstandard-0.25.0-cp312-cp312-win_arm64.whl", hash = "sha256:181eb40e0b6a29b3cd2849f825e0fa34397f649170673d385f3598ae17cca2e9", size = 462679, upload-time = "2025-09-14T22:17:23.147Z" }, + { url = "https://files.pythonhosted.org/packages/35/0b/8df9c4ad06af91d39e94fa96cc010a24ac4ef1378d3efab9223cc8593d40/zstandard-0.25.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec996f12524f88e151c339688c3897194821d7f03081ab35d31d1e12ec975e94", size = 795735, upload-time = "2025-09-14T22:17:26.042Z" }, + { url = "https://files.pythonhosted.org/packages/3f/06/9ae96a3e5dcfd119377ba33d4c42a7d89da1efabd5cb3e366b156c45ff4d/zstandard-0.25.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a1a4ae2dec3993a32247995bdfe367fc3266da832d82f8438c8570f989753de1", size = 640440, upload-time = "2025-09-14T22:17:27.366Z" }, + { url = "https://files.pythonhosted.org/packages/d9/14/933d27204c2bd404229c69f445862454dcc101cd69ef8c6068f15aaec12c/zstandard-0.25.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:e96594a5537722fdfb79951672a2a63aec5ebfb823e7560586f7484819f2a08f", size = 5343070, upload-time = "2025-09-14T22:17:28.896Z" }, + { url = "https://files.pythonhosted.org/packages/6d/db/ddb11011826ed7db9d0e485d13df79b58586bfdec56e5c84a928a9a78c1c/zstandard-0.25.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bfc4e20784722098822e3eee42b8e576b379ed72cca4a7cb856ae733e62192ea", size = 5063001, upload-time = "2025-09-14T22:17:31.044Z" }, + { url = "https://files.pythonhosted.org/packages/db/00/87466ea3f99599d02a5238498b87bf84a6348290c19571051839ca943777/zstandard-0.25.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:457ed498fc58cdc12fc48f7950e02740d4f7ae9493dd4ab2168a47c93c31298e", size = 5394120, upload-time = "2025-09-14T22:17:32.711Z" }, + { url = "https://files.pythonhosted.org/packages/2b/95/fc5531d9c618a679a20ff6c29e2b3ef1d1f4ad66c5e161ae6ff847d102a9/zstandard-0.25.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:fd7a5004eb1980d3cefe26b2685bcb0b17989901a70a1040d1ac86f1d898c551", size = 5451230, upload-time = "2025-09-14T22:17:34.41Z" }, + { url = "https://files.pythonhosted.org/packages/63/4b/e3678b4e776db00f9f7b2fe58e547e8928ef32727d7a1ff01dea010f3f13/zstandard-0.25.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e735494da3db08694d26480f1493ad2cf86e99bdd53e8e9771b2752a5c0246a", size = 5547173, upload-time = "2025-09-14T22:17:36.084Z" }, + { url = "https://files.pythonhosted.org/packages/4e/d5/ba05ed95c6b8ec30bd468dfeab20589f2cf709b5c940483e31d991f2ca58/zstandard-0.25.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3a39c94ad7866160a4a46d772e43311a743c316942037671beb264e395bdd611", size = 5046736, upload-time = "2025-09-14T22:17:37.891Z" }, + { url = "https://files.pythonhosted.org/packages/50/d5/870aa06b3a76c73eced65c044b92286a3c4e00554005ff51962deef28e28/zstandard-0.25.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:172de1f06947577d3a3005416977cce6168f2261284c02080e7ad0185faeced3", size = 5576368, upload-time = "2025-09-14T22:17:40.206Z" }, + { url = "https://files.pythonhosted.org/packages/5d/35/398dc2ffc89d304d59bc12f0fdd931b4ce455bddf7038a0a67733a25f550/zstandard-0.25.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3c83b0188c852a47cd13ef3bf9209fb0a77fa5374958b8c53aaa699398c6bd7b", size = 4954022, upload-time = "2025-09-14T22:17:41.879Z" }, + { url = "https://files.pythonhosted.org/packages/9a/5c/36ba1e5507d56d2213202ec2b05e8541734af5f2ce378c5d1ceaf4d88dc4/zstandard-0.25.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1673b7199bbe763365b81a4f3252b8e80f44c9e323fc42940dc8843bfeaf9851", size = 5267889, upload-time = "2025-09-14T22:17:43.577Z" }, + { url = "https://files.pythonhosted.org/packages/70/e8/2ec6b6fb7358b2ec0113ae202647ca7c0e9d15b61c005ae5225ad0995df5/zstandard-0.25.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0be7622c37c183406f3dbf0cba104118eb16a4ea7359eeb5752f0794882fc250", size = 5433952, upload-time = "2025-09-14T22:17:45.271Z" }, + { url = "https://files.pythonhosted.org/packages/7b/01/b5f4d4dbc59ef193e870495c6f1275f5b2928e01ff5a81fecb22a06e22fb/zstandard-0.25.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5f5e4c2a23ca271c218ac025bd7d635597048b366d6f31f420aaeb715239fc98", size = 5814054, upload-time = "2025-09-14T22:17:47.08Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e5/fbd822d5c6f427cf158316d012c5a12f233473c2f9c5fe5ab1ae5d21f3d8/zstandard-0.25.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f187a0bb61b35119d1926aee039524d1f93aaf38a9916b8c4b78ac8514a0aaf", size = 5360113, upload-time = "2025-09-14T22:17:48.893Z" }, + { url = "https://files.pythonhosted.org/packages/8e/e0/69a553d2047f9a2c7347caa225bb3a63b6d7704ad74610cb7823baa08ed7/zstandard-0.25.0-cp313-cp313-win32.whl", hash = "sha256:7030defa83eef3e51ff26f0b7bfb229f0204b66fe18e04359ce3474ac33cbc09", size = 436936, upload-time = "2025-09-14T22:17:52.658Z" }, + { url = "https://files.pythonhosted.org/packages/d9/82/b9c06c870f3bd8767c201f1edbdf9e8dc34be5b0fbc5682c4f80fe948475/zstandard-0.25.0-cp313-cp313-win_amd64.whl", hash = "sha256:1f830a0dac88719af0ae43b8b2d6aef487d437036468ef3c2ea59c51f9d55fd5", size = 506232, upload-time = "2025-09-14T22:17:50.402Z" }, + { url = "https://files.pythonhosted.org/packages/d4/57/60c3c01243bb81d381c9916e2a6d9e149ab8627c0c7d7abb2d73384b3c0c/zstandard-0.25.0-cp313-cp313-win_arm64.whl", hash = "sha256:85304a43f4d513f5464ceb938aa02c1e78c2943b29f44a750b48b25ac999a049", size = 462671, upload-time = "2025-09-14T22:17:51.533Z" }, + { url = "https://files.pythonhosted.org/packages/3d/5c/f8923b595b55fe49e30612987ad8bf053aef555c14f05bb659dd5dbe3e8a/zstandard-0.25.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e29f0cf06974c899b2c188ef7f783607dbef36da4c242eb6c82dcd8b512855e3", size = 795887, upload-time = "2025-09-14T22:17:54.198Z" }, + { url = "https://files.pythonhosted.org/packages/8d/09/d0a2a14fc3439c5f874042dca72a79c70a532090b7ba0003be73fee37ae2/zstandard-0.25.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:05df5136bc5a011f33cd25bc9f506e7426c0c9b3f9954f056831ce68f3b6689f", size = 640658, upload-time = "2025-09-14T22:17:55.423Z" }, + { url = "https://files.pythonhosted.org/packages/5d/7c/8b6b71b1ddd517f68ffb55e10834388d4f793c49c6b83effaaa05785b0b4/zstandard-0.25.0-cp314-cp314-manylinux2010_i686.manylinux_2_12_i686.manylinux_2_28_i686.whl", hash = "sha256:f604efd28f239cc21b3adb53eb061e2a205dc164be408e553b41ba2ffe0ca15c", size = 5379849, upload-time = "2025-09-14T22:17:57.372Z" }, + { url = "https://files.pythonhosted.org/packages/a4/86/a48e56320d0a17189ab7a42645387334fba2200e904ee47fc5a26c1fd8ca/zstandard-0.25.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223415140608d0f0da010499eaa8ccdb9af210a543fac54bce15babbcfc78439", size = 5058095, upload-time = "2025-09-14T22:17:59.498Z" }, + { url = "https://files.pythonhosted.org/packages/f8/ad/eb659984ee2c0a779f9d06dbfe45e2dc39d99ff40a319895df2d3d9a48e5/zstandard-0.25.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2e54296a283f3ab5a26fc9b8b5d4978ea0532f37b231644f367aa588930aa043", size = 5551751, upload-time = "2025-09-14T22:18:01.618Z" }, + { url = "https://files.pythonhosted.org/packages/61/b3/b637faea43677eb7bd42ab204dfb7053bd5c4582bfe6b1baefa80ac0c47b/zstandard-0.25.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ca54090275939dc8ec5dea2d2afb400e0f83444b2fc24e07df7fdef677110859", size = 6364818, upload-time = "2025-09-14T22:18:03.769Z" }, + { url = "https://files.pythonhosted.org/packages/31/dc/cc50210e11e465c975462439a492516a73300ab8caa8f5e0902544fd748b/zstandard-0.25.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e09bb6252b6476d8d56100e8147b803befa9a12cea144bbe629dd508800d1ad0", size = 5560402, upload-time = "2025-09-14T22:18:05.954Z" }, + { url = "https://files.pythonhosted.org/packages/c9/ae/56523ae9c142f0c08efd5e868a6da613ae76614eca1305259c3bf6a0ed43/zstandard-0.25.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a9ec8c642d1ec73287ae3e726792dd86c96f5681eb8df274a757bf62b750eae7", size = 4955108, upload-time = "2025-09-14T22:18:07.68Z" }, + { url = "https://files.pythonhosted.org/packages/98/cf/c899f2d6df0840d5e384cf4c4121458c72802e8bda19691f3b16619f51e9/zstandard-0.25.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a4089a10e598eae6393756b036e0f419e8c1d60f44a831520f9af41c14216cf2", size = 5269248, upload-time = "2025-09-14T22:18:09.753Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c0/59e912a531d91e1c192d3085fc0f6fb2852753c301a812d856d857ea03c6/zstandard-0.25.0-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:f67e8f1a324a900e75b5e28ffb152bcac9fbed1cc7b43f99cd90f395c4375344", size = 5430330, upload-time = "2025-09-14T22:18:11.966Z" }, + { url = "https://files.pythonhosted.org/packages/a0/1d/7e31db1240de2df22a58e2ea9a93fc6e38cc29353e660c0272b6735d6669/zstandard-0.25.0-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:9654dbc012d8b06fc3d19cc825af3f7bf8ae242226df5f83936cb39f5fdc846c", size = 5811123, upload-time = "2025-09-14T22:18:13.907Z" }, + { url = "https://files.pythonhosted.org/packages/f6/49/fac46df5ad353d50535e118d6983069df68ca5908d4d65b8c466150a4ff1/zstandard-0.25.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:4203ce3b31aec23012d3a4cf4a2ed64d12fea5269c49aed5e4c3611b938e4088", size = 5359591, upload-time = "2025-09-14T22:18:16.465Z" }, + { url = "https://files.pythonhosted.org/packages/c2/38/f249a2050ad1eea0bb364046153942e34abba95dd5520af199aed86fbb49/zstandard-0.25.0-cp314-cp314-win32.whl", hash = "sha256:da469dc041701583e34de852d8634703550348d5822e66a0c827d39b05365b12", size = 444513, upload-time = "2025-09-14T22:18:20.61Z" }, + { url = "https://files.pythonhosted.org/packages/3a/43/241f9615bcf8ba8903b3f0432da069e857fc4fd1783bd26183db53c4804b/zstandard-0.25.0-cp314-cp314-win_amd64.whl", hash = "sha256:c19bcdd826e95671065f8692b5a4aa95c52dc7a02a4c5a0cac46deb879a017a2", size = 516118, upload-time = "2025-09-14T22:18:17.849Z" }, + { url = "https://files.pythonhosted.org/packages/f0/ef/da163ce2450ed4febf6467d77ccb4cd52c4c30ab45624bad26ca0a27260c/zstandard-0.25.0-cp314-cp314-win_arm64.whl", hash = "sha256:d7541afd73985c630bafcd6338d2518ae96060075f9463d7dc14cfb33514383d", size = 476940, upload-time = "2025-09-14T22:18:19.088Z" }, +] From af6f7fb88767ea7bcb072c0ff988ac14efc8a5c9 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Thu, 18 Jun 2026 22:17:52 +0200 Subject: [PATCH 20/27] Update .github/workflows/full-test-jsonl.yml Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- .github/workflows/full-test-jsonl.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/full-test-jsonl.yml b/.github/workflows/full-test-jsonl.yml index e89d255c..d26bd984 100644 --- a/.github/workflows/full-test-jsonl.yml +++ b/.github/workflows/full-test-jsonl.yml @@ -67,21 +67,21 @@ jobs: docker logs -f $CONTAINER_NAME & LOGS_PID=$! - SECONDS=0 + ELAPSED=0 TIMEOUT=360 - while [ $SECONDS -lt $TIMEOUT ]; do + while [ "$ELAPSED" -lt "$TIMEOUT" ]; do HEALTH_STATUS=$(docker inspect --format='{{.State.Health.Status}}' $CONTAINER_NAME 2>/dev/null || echo "unknown") if [ "$HEALTH_STATUS" = "healthy" ]; then - echo "Container is healthy after $SECONDS seconds" + echo "Container is healthy after ${ELAPSED} seconds" kill $LOGS_PID 2>/dev/null || true exit 0 fi - echo "Health status: $HEALTH_STATUS (elapsed: ${SECONDS}s)" + echo "Health status: $HEALTH_STATUS (elapsed: ${ELAPSED}s)" sleep 10 - SECONDS=$((SECONDS + 10)) + ELAPSED=$((ELAPSED + 10)) done kill $LOGS_PID 2>/dev/null || true From 85e611eba0165e242e7fd2ab4f043041d5ea2c83 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Fri, 19 Jun 2026 01:46:45 +0200 Subject: [PATCH 21/27] ci: update lint workflow and fix README badges and typos --- .github/workflows/lint.yml | 4 ++++ README.md | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 27e436ca..fad01a82 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,8 +7,12 @@ on: - "tests/**" - "pyproject.toml" - "uv.lock" + - ".python-version" workflow_dispatch: +permissions: + contents: read + jobs: lint: runs-on: ubuntu-latest diff --git a/README.md b/README.md index 5d155c94..1588cc8b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![Docker Pulls](https://img.shields.io/docker/pulls/rtuszik/photon-docker) ![Docker Image Size](https://img.shields.io/docker/imje-size/rtuszik/photon-docker) ![Docker Image Version](https://img.shields.io/docker/v/rtuszik/photon-docker) ![GitHub Release](https://img.shields.io/github/v/release/komoot/photon?label=Photon) ![Lint Status](https://github.com/rtuszik/photon-docker/actions/workflows/lint.yml/badge.svg) +![Docker Pulls](https://img.shields.io/docker/pulls/rtuszik/photon-docker) ![Docker Image Size](https://img.shields.io/docker/image-size/rtuszik/photon-docker) ![Docker Image Version](https://img.shields.io/docker/v/rtuszik/photon-docker) ![GitHub Release](https://img.shields.io/github/v/release/komoot/photon?label=Photon) ![Lint Status](https://github.com/rtuszik/photon-docker/actions/workflows/lint.yml/badge.svg) # Photon Docker Image @@ -21,8 +21,8 @@ enhancing data privacy and integration capabilities with services like [Dawarich - The initial download and extraction process may take a considerable amount of time. Depending on your hardware, checksum verification and decompression may take multiple hours. -- The JSONL import _will_ take a signficant amount of time. - As a point of reference, a full planet import, tested on a fresh VPS (4C/16GB) took 10hours and 25minutes. +- The JSONL import _will_ take a significant amount of time. + As a point of reference, a full planet import, tested on a fresh VPS (4C/16GB) took 10 hours and 25 minutes. - To reduce the load on the official Photon servers, the default `BASE_URL` for downloading the index files points to a mirror hosted by my. From 70264b8438528ed4e19d1eedbdea9b2723c17094 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Fri, 19 Jun 2026 01:50:03 +0200 Subject: [PATCH 22/27] ci(github-actions): add read permissions and use env vars in summary --- .github/workflows/build-and-push.yml | 4 ++++ .github/workflows/full-test-jsonl.yml | 9 +++++++-- .github/workflows/full-test.yml | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-push.yml b/.github/workflows/build-and-push.yml index 9074db3a..8a3f7394 100644 --- a/.github/workflows/build-and-push.yml +++ b/.github/workflows/build-and-push.yml @@ -18,6 +18,10 @@ on: branches: - dev +permissions: + contents: read + packages: write + jobs: build-and-push: runs-on: ubuntu-latest diff --git a/.github/workflows/full-test-jsonl.yml b/.github/workflows/full-test-jsonl.yml index d26bd984..73792a42 100644 --- a/.github/workflows/full-test-jsonl.yml +++ b/.github/workflows/full-test-jsonl.yml @@ -13,6 +13,9 @@ on: - "pyproject.toml" - "uv.lock" +permissions: + contents: read + jobs: test-container-jsonl: runs-on: ubuntu-latest @@ -101,5 +104,7 @@ jobs: run: | echo "## Container Test Summary" >> $GITHUB_STEP_SUMMARY echo "- **PR Number:** ${{ github.event.pull_request.number }}" >> $GITHUB_STEP_SUMMARY - echo "- **Photon Version:** ${{ env.PHOTON_VERSION }}" >> $GITHUB_STEP_SUMMARY - echo "- **Status:** ${{ job.status }}" >> $GITHUB_STEP_SUMMARY + echo "- **Photon Version:** ${PHOTON_VERSION}" >> $GITHUB_STEP_SUMMARY + echo "- **Status:** ${JOB_STATUS}" >> $GITHUB_STEP_SUMMARY + env: + JOB_STATUS: ${{ job.status }} diff --git a/.github/workflows/full-test.yml b/.github/workflows/full-test.yml index 2b72424b..c20f448d 100644 --- a/.github/workflows/full-test.yml +++ b/.github/workflows/full-test.yml @@ -14,6 +14,9 @@ on: - "pyproject.toml" - "uv.lock" +permissions: + contents: read + jobs: test-container: runs-on: ubuntu-latest From e8581cc1cdb68d6b8c5e37e03358148acad12c79 Mon Sep 17 00:00:00 2001 From: Robin Tuszik Date: Fri, 19 Jun 2026 11:49:38 +0200 Subject: [PATCH 23/27] refactor(update): ensure temp directory cleared on failure --- src/update.py | 18 ++++++++++-------- tests/test_update.py | 22 ++++++++++++++++++++++ 2 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/update.py b/src/update.py index a131a001..1276c3d7 100644 --- a/src/update.py +++ b/src/update.py @@ -177,15 +177,17 @@ def run_update(strategy: str): prepare_temp_dir() - download_url = get_download_url() - _ensure_disk_space(download_url, parallel=strategy == "PARALLEL") + try: + download_url = get_download_url() + _ensure_disk_space(download_url, parallel=strategy == "PARALLEL") - index_file = _download_verified_index() + index_file = _download_verified_index() - extract_index(index_file) + extract_index(index_file) - logging.info("Activating new index") - index.activate(os.path.join(config.TEMP_DIR, "photon_data")) - clear_temp_dir() + logging.info("Activating new index") + index.activate(os.path.join(config.TEMP_DIR, "photon_data")) - logging.info("Update pipeline completed successfully.") + logging.info("Update pipeline completed successfully.") + finally: + clear_temp_dir() diff --git a/tests/test_update.py b/tests/test_update.py index 5e426cc5..95d85052 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -321,6 +321,28 @@ def boom(): update.run_update("SEQUENTIAL") +def test_run_update_clears_temp_dir_on_failure(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(config, "SKIP_MD5_CHECK", True) + _make_pipeline_patches(monkeypatch) + + cleared = {"n": 0} + + def fake_clear(): + cleared["n"] += 1 + + monkeypatch.setattr(update, "clear_temp_dir", fake_clear) + + def boom(): + raise update.DownloadError("download died") + + monkeypatch.setattr(update, "download_index", boom) + + with pytest.raises(update.DownloadError, match="download died"): + update.run_update("SEQUENTIAL") + + assert cleared["n"] == 1 + + def test_run_update_checksum_mismatch_prevents_activation(fake_dirs: Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(config, "SKIP_MD5_CHECK", False) _make_pipeline_patches(monkeypatch) From cb4af16a73a3c37b4ca0f823326054b4e4677e31 Mon Sep 17 00:00:00 2001 From: binnichtaktiv Date: Sat, 20 Jun 2026 11:24:46 +0200 Subject: [PATCH 24/27] feat: persist update schedule across restarts --- src/process_manager.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/src/process_manager.py b/src/process_manager.py index 4756299b..29870afd 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -6,6 +6,7 @@ import sys import threading import time +import json from enum import Enum import psutil @@ -22,6 +23,7 @@ logger = get_logger() +LAST_UPDATE_RUN = os.path.join(config.DATA_DIR, '.last_update_run') def check_photon_health(timeout=30, max_retries=10) -> bool: url = "http://localhost:2322/status" @@ -236,6 +238,8 @@ def run_update(self): logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") send_notification("Photon Index Updated Successfully") index.drop_backup() + with open(LAST_UPDATE_RUN, 'w') as f: + f.write(json.dumps({"ts": time.time()})) else: update_duration = time.time() - update_start logger.error(f"Update failed - Photon health check failed after restart ({update_duration:.1f}s)") @@ -254,25 +258,39 @@ def schedule_updates(self): if config.IMPORT_MODE == "jsonl": logger.info("Skipping scheduled updates in JSONL mode until rebuild support is implemented") return + + if not os.path.exists(LAST_UPDATE_RUN): + with open(LAST_UPDATE_RUN, 'w') as f: + f.write(json.dumps({"ts": time.time()})) + last_run = time.time() + logger.info("No last update timestamp found, treating as first run") + else: + try: + last_run = json.loads(LAST_UPDATE_RUN.read_text())["ts"] + except Exception: + logger.error("Unable to read last update timestamp") + return interval = config.UPDATE_INTERVAL.lower() if interval.endswith("d"): - days = int(interval[:-1]) - schedule.every(days).days.do(self.run_update) - logger.info(f"Scheduling updates every {days} days") + interval_seconds = int(interval[:-1]) * 86400 + schedule.every(int(interval[:-1])).days.do(self.run_update) elif interval.endswith("h"): - hours = int(interval[:-1]) - schedule.every(hours).hours.do(self.run_update) - logger.info(f"Scheduling updates every {hours} hours") + interval_seconds = int(interval[:-1]) * 3600 + schedule.every(int(interval[:-1])).hours.do(self.run_update) elif interval.endswith("m"): - minutes = int(interval[:-1]) - schedule.every(minutes).minutes.do(self.run_update) - logger.info(f"Scheduling updates every {minutes} minutes") + interval_seconds = int(interval[:-1]) * 60 + schedule.every(int(interval[:-1])).minutes.do(self.run_update) else: logger.warning(f"Invalid UPDATE_INTERVAL format: {interval}, defaulting to daily") + interval_seconds = 86400 schedule.every().day.do(self.run_update) + if (time.time() - last_run) >= interval_seconds: + logger.info("Update interval elapsed since last run, running update now...") + threading.Thread(target=self.run_update, daemon=True).start() + def scheduler_loop(): while not self.should_exit: self._run_pending_jobs() From e0de292afe45e5fb34466a567c533264ffa97e6d Mon Sep 17 00:00:00 2001 From: binnichtaktiv Date: Sat, 20 Jun 2026 12:21:03 +0200 Subject: [PATCH 25/27] test: fix schedule_updates tests to use dynamic data dir path --- src/process_manager.py | 12 +++++++----- tests/test_process_manager.py | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/process_manager.py b/src/process_manager.py index 29870afd..e34f29f2 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -23,7 +23,8 @@ logger = get_logger() -LAST_UPDATE_RUN = os.path.join(config.DATA_DIR, '.last_update_run') +def _get_last_update_run_path() -> str: + return os.path.join(config.DATA_DIR, '.last_update_run') def check_photon_health(timeout=30, max_retries=10) -> bool: url = "http://localhost:2322/status" @@ -238,7 +239,7 @@ def run_update(self): logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") send_notification("Photon Index Updated Successfully") index.drop_backup() - with open(LAST_UPDATE_RUN, 'w') as f: + with open(_get_last_update_run_path(), 'w') as f: f.write(json.dumps({"ts": time.time()})) else: update_duration = time.time() - update_start @@ -259,14 +260,15 @@ def schedule_updates(self): logger.info("Skipping scheduled updates in JSONL mode until rebuild support is implemented") return - if not os.path.exists(LAST_UPDATE_RUN): - with open(LAST_UPDATE_RUN, 'w') as f: + if not os.path.exists(_get_last_update_run_path()): + with open(_get_last_update_run_path(), 'w') as f: f.write(json.dumps({"ts": time.time()})) last_run = time.time() logger.info("No last update timestamp found, treating as first run") else: try: - last_run = json.loads(LAST_UPDATE_RUN.read_text())["ts"] + with open(_get_last_update_run_path(), 'r') as f: + last_run = json.loads(f.read())["ts"] except Exception: logger.error("Unable to read last update timestamp") return diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 9695faab..754e20dc 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -404,6 +404,7 @@ def test_schedule_updates_parses_intervals( ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", interval) + monkeypatch.setattr(config, "DATA_DIR", "/tmp") monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) manager.schedule_updates() jobs = schedule.get_jobs() @@ -416,6 +417,7 @@ def test_schedule_updates_falls_back_to_daily_on_invalid_interval( ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", "garbage") + monkeypatch.setattr(config, "DATA_DIR", "/tmp") monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) manager.schedule_updates() jobs = schedule.get_jobs() From 5bcee342b11666a27ff3f7693118aea3a08ca6b6 Mon Sep 17 00:00:00 2001 From: binnichtaktiv Date: Sat, 20 Jun 2026 12:30:30 +0200 Subject: [PATCH 26/27] test: fix schedule_updates tests to use tmp_path for data dir --- tests/test_process_manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 754e20dc..0b7548c0 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -400,11 +400,11 @@ def test_run_pending_jobs_survives_job_exception( @pytest.mark.parametrize(("interval", "expected_unit"), [("3d", "days"), ("12h", "hours"), ("30m", "minutes")]) def test_schedule_updates_parses_intervals( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, interval: str, expected_unit: str + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, interval: str, expected_unit: str ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", interval) - monkeypatch.setattr(config, "DATA_DIR", "/tmp") + monkeypatch.setattr(config, "DATA_DIR", str(tmp_path)) monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) manager.schedule_updates() jobs = schedule.get_jobs() @@ -413,11 +413,11 @@ def test_schedule_updates_parses_intervals( def test_schedule_updates_falls_back_to_daily_on_invalid_interval( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch + manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", "garbage") - monkeypatch.setattr(config, "DATA_DIR", "/tmp") + monkeypatch.setattr(config, "DATA_DIR", str(tmp_path)) monkeypatch.setattr(process_manager.threading, "Thread", lambda **_: MagicMock(start=lambda: None)) manager.schedule_updates() jobs = schedule.get_jobs() From b06c560ee1aa94ae761cf90a87a2a38b66652af8 Mon Sep 17 00:00:00 2001 From: binnichtaktiv Date: Sat, 20 Jun 2026 14:05:57 +0200 Subject: [PATCH 27/27] feat: log next update time and apply ruff formatting --- src/process_manager.py | 17 +++++++++++------ tests/test_process_manager.py | 6 +++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/process_manager.py b/src/process_manager.py index e34f29f2..5f1f535f 100644 --- a/src/process_manager.py +++ b/src/process_manager.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +import json import os import shlex import signal @@ -6,8 +7,8 @@ import sys import threading import time -import json from enum import Enum +from datetime import datetime import psutil import requests @@ -23,8 +24,10 @@ logger = get_logger() + def _get_last_update_run_path() -> str: - return os.path.join(config.DATA_DIR, '.last_update_run') + return os.path.join(config.DATA_DIR, ".last_update_run") + def check_photon_health(timeout=30, max_retries=10) -> bool: url = "http://localhost:2322/status" @@ -239,7 +242,7 @@ def run_update(self): logger.info(f"Update completed successfully - Photon healthy ({update_duration:.1f}s)") send_notification("Photon Index Updated Successfully") index.drop_backup() - with open(_get_last_update_run_path(), 'w') as f: + with open(_get_last_update_run_path(), "w") as f: f.write(json.dumps({"ts": time.time()})) else: update_duration = time.time() - update_start @@ -259,15 +262,15 @@ def schedule_updates(self): if config.IMPORT_MODE == "jsonl": logger.info("Skipping scheduled updates in JSONL mode until rebuild support is implemented") return - + if not os.path.exists(_get_last_update_run_path()): - with open(_get_last_update_run_path(), 'w') as f: + with open(_get_last_update_run_path(), "w") as f: f.write(json.dumps({"ts": time.time()})) last_run = time.time() logger.info("No last update timestamp found, treating as first run") else: try: - with open(_get_last_update_run_path(), 'r') as f: + with open(_get_last_update_run_path()) as f: last_run = json.loads(f.read())["ts"] except Exception: logger.error("Unable to read last update timestamp") @@ -292,6 +295,8 @@ def schedule_updates(self): if (time.time() - last_run) >= interval_seconds: logger.info("Update interval elapsed since last run, running update now...") threading.Thread(target=self.run_update, daemon=True).start() + else: + logger.info(f"Next update scheduled for {datetime.fromtimestamp(last_run + interval_seconds).strftime('%m-%d-%Y %H:%M')}") def scheduler_loop(): while not self.should_exit: diff --git a/tests/test_process_manager.py b/tests/test_process_manager.py index 0b7548c0..5615e268 100644 --- a/tests/test_process_manager.py +++ b/tests/test_process_manager.py @@ -400,7 +400,11 @@ def test_run_pending_jobs_survives_job_exception( @pytest.mark.parametrize(("interval", "expected_unit"), [("3d", "days"), ("12h", "hours"), ("30m", "minutes")]) def test_schedule_updates_parses_intervals( - manager: process_manager.PhotonManager, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, interval: str, expected_unit: str + manager: process_manager.PhotonManager, + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + interval: str, + expected_unit: str, ): monkeypatch.setattr(config, "UPDATE_STRATEGY", "SEQUENTIAL") monkeypatch.setattr(config, "UPDATE_INTERVAL", interval)