Skip to content

Commit 823111e

Browse files
committed
changes from code reviews
1 parent 651654e commit 823111e

3 files changed

Lines changed: 67 additions & 10 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
88
## [Unreleased (Targeting 0.2.5)]
99

1010
### Added
11-
- Optional parameter to `fetch_file()` with a modified time of the remote file pulled from the TIND API
12-
- `fetch_file()` uses this to avoid unnecessary downloads if a file already exists at the target
13-
location and has a modified time that is newer than the requested file
11+
- Optional parameter to `fetch_file()` with a modified time of the remote file pulled from the TIND API.
12+
`fetch_file()` uses this to avoid unnecessary downloads if a file already exists at the target
13+
location and has a modified time that is newer than the requested file.
14+
- Added tests for this new behavior.
1415

1516
### Changed
16-
- slight change to raise a file not downloaded error if `tind_download()` fails to return a written file path
17+
- slight change to raise a file not downloaded error if `tind_download()` fails to return a written file path.
1718

1819

1920
## [0.2.4]

tests/test_fetch.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
import json
66
import xml.etree.ElementTree as E
77

8+
from datetime import datetime, timezone
89
from pathlib import Path
10+
from unittest.mock import MagicMock, patch
911

1012
import pytest
1113
import requests_mock as req_mock # noqa: F401 — activates the requests_mock fixture
@@ -87,6 +89,59 @@ def test_fetch_file_not_found(
8789
client.fetch_file(url, output_dir=str(tmp_path))
8890

8991

92+
def test_fetch_file_skips_download_when_local_file_is_newer(
93+
tmp_path: Path,
94+
client: TINDClient,
95+
) -> None:
96+
"""fetch_file returns the cached path without downloading when local mtime >= meta_mtime."""
97+
98+
url = f"{BASE_URL}/api/v1/record/12345/files/some-image.jpg/download/?version=1"
99+
cached = tmp_path / "some-image.jpg"
100+
cached.write_bytes(b"cached content")
101+
102+
meta_mtime = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
103+
local_mtime = datetime(2026, 1, 3, 0, 0, 0, tzinfo=timezone.utc) # newer
104+
105+
mock_stat = MagicMock()
106+
mock_stat.st_mtime = local_mtime.timestamp()
107+
108+
with patch.object(Path, "stat", return_value=mock_stat):
109+
result = client.fetch_file(url, output_dir=str(tmp_path), meta_mtime=meta_mtime)
110+
111+
assert result == str(cached)
112+
113+
114+
def test_fetch_file_redownloads_when_local_file_is_older(
115+
requests_mock: req_mock.Mocker,
116+
tmp_path: Path,
117+
client: TINDClient,
118+
) -> None:
119+
"""fetch_file re-downloads when local mtime < meta_mtime."""
120+
121+
url = f"{BASE_URL}/api/v1/record/12345/files/some-other-image.jpg/download/?version=1"
122+
cached = tmp_path / "some-other-image.jpg"
123+
cached.write_bytes(b"stale content")
124+
125+
meta_mtime = datetime(2026, 1, 3, 0, 0, 0, tzinfo=timezone.utc)
126+
local_mtime = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) # older
127+
128+
mock_stat = MagicMock()
129+
mock_stat.st_mtime = local_mtime.timestamp()
130+
131+
requests_mock.get(
132+
url,
133+
content=b"fresh content",
134+
status_code=200,
135+
headers={"Content-Disposition": 'attachment; filename="some-other-image.jpg"'},
136+
)
137+
138+
with patch.object(Path, "stat", return_value=mock_stat):
139+
result = client.fetch_file(url, output_dir=str(tmp_path), meta_mtime=meta_mtime)
140+
141+
assert result.endswith("some-other-image.jpg")
142+
assert Path(result).read_bytes() == b"fresh content"
143+
144+
90145
# ---------------------------------------------------------------------------
91146
# fetch_file_metadata
92147
# ---------------------------------------------------------------------------

tind_client/client.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,19 @@ def fetch_metadata(self, record: str) -> Record:
7272

7373
return records[0]
7474

75-
def fetch_file(self, file_url: str, output_dir: str = "", modified: str = "") -> str:
75+
def fetch_file(
76+
self, file_url: str, output_dir: str = "", meta_mtime: datetime | None = None
77+
) -> str:
7678
"""Download a file from TIND and save it locally.
7779
7880
If the file already exists in the output directory and was modified at or after a supplied
79-
``modified`` timestamp, the file will not be re-downloaded.
81+
``meta_mtime`` timestamp, the file will not be re-downloaded.
8082
8183
:param str file_url: The TIND file download URL.
8284
:param str output_dir: Directory in which to save the file.
8385
Falls back to ``default_storage_dir`` when empty.
84-
:param str modified: Optional modified timestamp from the file metadata returned by TIND.
85-
If not specified, the file will always be downloaded.
86+
:param datetime meta_mtime: Optional modified timestamp from the file metadata returned by
87+
TIND. If not specified, the file will always be downloaded.
8688
:raises AuthorizationError: When the TIND API key is invalid or the file is restricted.
8789
:raises ValueError: When ``file_url`` is not a valid TIND file download URL.
8890
:raises RecordNotFoundError: When the file is invalid or not found.
@@ -96,8 +98,7 @@ def fetch_file(self, file_url: str, output_dir: str = "", modified: str = "") ->
9698
expected_filename = file_url.rstrip("/").split("/")[-2]
9799
expected_path = Path(output_target) / expected_filename
98100

99-
if modified and expected_path.exists():
100-
meta_mtime = datetime.fromisoformat(modified).replace(tzinfo=timezone.utc)
101+
if meta_mtime and expected_path.exists():
101102
local_mtime = datetime.fromtimestamp(expected_path.stat().st_mtime, tz=timezone.utc)
102103
if local_mtime >= meta_mtime:
103104
logger.debug("Cached file at (%s) is newer; skipping download.", expected_path)

0 commit comments

Comments
 (0)