Skip to content

Commit 95d591a

Browse files
committed
Eliminate assumption that asset keys are usable as filename #820
1 parent f316872 commit 95d591a

4 files changed

Lines changed: 96 additions & 45 deletions

File tree

docs/batch_jobs.rst

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -378,11 +378,11 @@ If you know that there is just a single result file, you can also download it di
378378
This will fail however if there are multiple assets in the job result
379379
(like in the metadata example above).
380380
In that case you can still download a single by specifying which one you
381-
want to download with the ``name`` argument:
381+
want to download with the ``key`` argument:
382382
383383
.. code-block:: python
384384
385-
results.download_file("data/out/result.tiff", name="res002.tiff")
385+
results.download_file("data/out/result.tiff", key="res002.tiff")
386386
387387
388388
Fine-grained asset downloads
@@ -396,9 +396,14 @@ with :py:meth:`~openeo.rest.job.ResultAsset.download`, like this:
396396
.. code-block:: python
397397
398398
for asset in results.get_assets():
399-
if asset.metadata["type"].startswith("image/tiff"):
400-
asset.download("data/out/result-v2-" + asset.name)
399+
if asset.media_type.startswith("image/tiff"):
400+
asset.download("data/out/result-v2-" + asset.key)
401401
402+
.. note::
403+
The ``key`` of an asset is not guaranteed to be directly usable as filename safe,
404+
so while the snippet above glosses over that aspect,
405+
make sure to properly sanitize the filename or
406+
at least check the behavior of the backend you are working with.
402407
403408
Directly load batch job results
404409
===============================

openeo/internal/warnings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@ class UserDeprecationWarning(Warning):
2727
pass
2828

2929

30+
def user_deprecation_warning(message: str, stacklevel: int = 3):
31+
"""
32+
Helper to compactly raise a `UserDeprecationWarning` with proper stacklevel.
33+
"""
34+
warnings.warn(message, category=UserDeprecationWarning, stacklevel=stacklevel
35+
)
36+
3037
def test_warnings(stacklevel=1):
3138
"""Trigger some warnings (for test contexts)."""
3239
for warning in [UserWarning, DeprecationWarning, UserDeprecationWarning]:

openeo/rest/job.py

Lines changed: 72 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from __future__ import annotations
22

3+
import warnings
4+
35
import datetime
46
import json
57
import logging
8+
import re
69
import time
710
import typing
811
from pathlib import Path
@@ -12,7 +15,7 @@
1215

1316
from openeo.internal.documentation import openeo_endpoint
1417
from openeo.internal.jupyter import VisualDict, render_component, render_error
15-
from openeo.internal.warnings import deprecated, legacy_alias
18+
from openeo.internal.warnings import deprecated, legacy_alias, user_deprecation_warning
1619
from openeo.rest import (
1720
DEFAULT_DOWNLOAD_CHUNK_SIZE,
1821
DEFAULT_DOWNLOAD_RANGE_SIZE,
@@ -400,22 +403,41 @@ class ResultAsset:
400403
.. versionadded:: 0.4.10
401404
"""
402405

403-
def __init__(self, job: BatchJob, name: str, href: str, metadata: dict):
404-
self.job = job
406+
__slots__ = ("job", "key", "href", "media_type", "metadata")
405407

406-
self.name = name
407-
"""Asset name as advertised by the backend."""
408+
def __init__(self, job: BatchJob, key: str, href: str, metadata: dict):
409+
# TODO: the owning job is actually only used to obtain a Connection for downloading. Eliminate BatchJob as requirement
410+
self.job = job
411+
self.key = key
408412

413+
# URL to the downloadable asset
409414
self.href = href
410-
"""Download URL of the asset."""
411415

416+
self.media_type = metadata.get("type")
417+
418+
# Asset metadata provided by the backend, possibly containing keys "type" (for media type), "roles", "title", "description".
412419
self.metadata = metadata
413-
"""Asset metadata provided by the backend, possibly containing keys "type" (for media type), "roles", "title", "description"."""
414420

415421
def __repr__(self):
416-
return "<ResultAsset {n!r} (type {t}) at {h!r}>".format(
417-
n=self.name, t=self.metadata.get("type", "unknown"), h=self.href
418-
)
422+
return f"<ResultAsset {self.key!r} (type {self.media_type}) at {self.href!r}>"
423+
424+
@property
425+
def name(self):
426+
# TODO: remove this once users got enough time to migrate
427+
user_deprecation_warning("`ResultAsset.name` is deprecated and will be removed, use `ResultAsset.key` instead")
428+
return self.key
429+
430+
def _make_filename(self) -> str:
431+
"""
432+
Produce a filename for downloading the asset to
433+
as fallback when user did not provide something,
434+
based on: asset key (which is not guaranteed to consist of filename-safe characters)
435+
and filename in href (if any)
436+
"""
437+
safe_filename_regex = re.compile(r"^[a-zA-Z0-9_.-]+\.[a-zA-Z0-9]+$")
438+
if safe_filename_regex.fullmatch(self.key):
439+
# Legacy mode: use asset key as filename
440+
return self.key
419441

420442
def download(
421443
self,
@@ -427,16 +449,18 @@ def download(
427449
"""
428450
Download asset to given location
429451
430-
:param target: download target path. Can be an existing folder
431-
(in which case the filename advertised by backend will be used)
432-
or full file name. By default, the working directory will be used.
452+
:param target: target path to download to.
453+
Can be a path to file, or to an existing folder
454+
(in which case the filename will be constructed
455+
in best-effort fashion, based on available metadata)
456+
By default, the working directory will be used.
433457
:param chunk_size: chunk size for streaming response.
434458
"""
435459
target = Path(target or Path.cwd())
436460
if target.is_dir():
437-
target = target / self.name
461+
target = target / self._make_filename()
438462
ensure_dir(target.parent)
439-
logger.info("Downloading Job result asset {n!r} from {h!s} to {t!s}".format(n=self.name, h=self.href, t=target))
463+
logger.info(f"Downloading job result asset {self.key!r} from {self.href!s} to {target!s}")
440464
self._download_to_file(url=self.href, target=target, chunk_size=chunk_size, range_size=range_size)
441465
return target
442466

@@ -445,7 +469,7 @@ def _get_response(self, stream=True) -> requests.Response:
445469

446470
def load_json(self) -> dict:
447471
"""Load asset in memory and parse as JSON."""
448-
if not (self.name.lower().endswith(".json") or self.metadata.get("type") == "application/json"):
472+
if self.metadata.get("type") != "application/json":
449473
logger.warning("Asset might not be JSON")
450474
return self._get_response().json()
451475

@@ -559,50 +583,65 @@ def get_assets(self) -> List[ResultAsset]:
559583
if not assets:
560584
logger.warning("No assets found in job result metadata.")
561585
return [
562-
ResultAsset(job=self._job, name=name, href=asset["href"], metadata=asset) for name, asset in assets.items()
586+
ResultAsset(job=self._job, key=key, href=asset["href"], metadata=asset) for key, asset in assets.items()
563587
]
564588

565-
def get_asset(self, name: str = None) -> ResultAsset:
589+
def get_asset(self, key: Optional[str] = None, *, name: Optional[str] = None) -> ResultAsset:
566590
"""
567-
Get single asset by name or without name if there is only one.
591+
Get single asset by asset key or without key if there is only one.
568592
"""
569593
# TODO: also support getting a single asset by type or role?
594+
if name:
595+
# TODO: remove this legacy `name` support when users got enough time to migrate
596+
user_deprecation_warning("Argument `name` is deprecated and will be removed, use `key` instead.")
597+
key = name
598+
del name
599+
570600
assets = self.get_assets()
571601
if len(assets) == 0:
572602
raise OpenEoClientException("No assets in result.")
573-
if name is None:
603+
if key is None:
574604
if len(assets) == 1:
575605
return assets[0]
576606
else:
577607
raise MultipleAssetException(
578-
"Multiple result assets for job {j}: {a}".format(j=self._job.job_id, a=[a.name for a in assets])
608+
"Multiple result assets for job {j}: {a}".format(j=self._job.job_id, a=[a.key for a in assets])
579609
)
580610
else:
581611
try:
582-
return next(a for a in assets if a.name == name)
612+
return next(a for a in assets if a.key == key)
583613
except StopIteration:
584-
raise OpenEoClientException("No asset {n!r} in: {a}".format(n=name, a=[a.name for a in assets]))
614+
raise OpenEoClientException("No asset {k!r} in: {a}".format(k=key, a=[a.key for a in assets]))
585615

586616
def download_file(
587617
self,
588-
target: Union[Path, str] = None,
589-
name: str = None,
618+
target: Union[Path, str, None] = None,
619+
key: Optional[str] = None,
590620
*,
591-
chunk_size=DEFAULT_DOWNLOAD_CHUNK_SIZE,
621+
chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE,
592622
range_size: int = DEFAULT_DOWNLOAD_RANGE_SIZE,
623+
name: Optional[str] = None,
593624
) -> Path:
594625
"""
595626
Download single asset. Can be used when there is only one asset in the
596-
:py:class:`JobResults`, or when the desired asset name is given explicitly.
597-
598-
:param target: path to download to. Can be an existing directory
599-
(in which case the filename advertised by backend will be used)
600-
or full file name. By default, the working directory will be used.
601-
:param name: asset name to download (not required when there is only one asset)
627+
:py:class:`JobResults`, or when the desired asset key is given explicitly.
628+
629+
:param target: target path to download to.
630+
Can be a path to file, or to an existing folder
631+
(in which case the filename will be constructed
632+
in best-effort fashion, based on available metadata)
633+
By default, the working directory will be used.
634+
:param key: asset key to download (not required when there is only one asset)
602635
:return: path of downloaded asset
603636
"""
637+
if name:
638+
# TODO: remove this legacy `name` support when users got enough time to migrate
639+
user_deprecation_warning("Argument `name` is deprecated and will be removed, use `key` instead.")
640+
key = name
641+
del name
642+
604643
try:
605-
return self.get_asset(name=name).download(target=target, chunk_size=chunk_size, range_size=range_size)
644+
return self.get_asset(key=key).download(target=target, chunk_size=chunk_size, range_size=range_size)
606645
except MultipleAssetException:
607646
raise OpenEoClientException(
608647
"Can not use `download_file` with multiple assets. Use `download_files` instead."

tests/rest/test_job.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -827,7 +827,7 @@ def test_get_results_multiple_download_single(job_with_2_assets: BatchJob, tmp_p
827827
def test_get_results_multiple_download_single_by_name(job_with_2_assets: BatchJob, tmp_path):
828828
job = job_with_2_assets
829829
target = tmp_path / "res.tiff"
830-
path = job.get_results().download_file(target, name="1.tiff")
830+
path = job.get_results().download_file(target, key="1.tiff")
831831
assert path == target
832832
assert list(p.name for p in tmp_path.iterdir()) == ["res.tiff"]
833833
with path.open("rb") as f:
@@ -839,7 +839,7 @@ def test_get_results_multiple_download_single_by_wrong_name(job_with_2_assets: B
839839
target = tmp_path / "res.tiff"
840840
expected = r"No asset 'foobar\.tiff' in: \['.\.tiff', '.\.tiff'\]"
841841
with pytest.raises(OpenEoClientException, match=expected):
842-
job.get_results().download_file(target, name="foobar.tiff")
842+
job.get_results().download_file(target, key="foobar.tiff")
843843

844844

845845
def test_download_results(job_with_2_assets: BatchJob, tmp_path):
@@ -875,7 +875,7 @@ def test_get_results_download_files(job_with_2_assets: BatchJob, tmp_path):
875875
results = job.get_results()
876876

877877
assets = job.get_results().get_assets()
878-
assert {a.name: a.metadata for a in assets} == {
878+
assert {a.key: a.metadata for a in assets} == {
879879
"1.tiff": {"href": "https://oeo.test/dl/jjr1.tiff", "type": "image/tiff; application=geotiff"},
880880
"2.tiff": {"href": "https://oeo.test/dl/jjr2.tiff", "type": "image/tiff; application=geotiff"},
881881
}
@@ -932,7 +932,7 @@ def test_result_asset_download_file(con100, requests_mock, tmp_path):
932932
requests_mock.get(href, content=TIFF_CONTENT)
933933

934934
job = BatchJob("jj", connection=con100)
935-
asset = ResultAsset(job, name="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
935+
asset = ResultAsset(job, key="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
936936
target = tmp_path / "res.tiff"
937937
path = asset.download(target)
938938

@@ -948,7 +948,7 @@ def test_result_asset_download_file_error(con100, requests_mock, tmp_path):
948948
requests_mock.get(href, status_code=500, text="Nope!")
949949

950950
job = BatchJob("jj", connection=con100)
951-
asset = ResultAsset(job, name="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
951+
asset = ResultAsset(job, key="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
952952

953953
with pytest.raises(OpenEoApiPlainError, match="Nope!"):
954954
_ = asset.download(tmp_path / "res.tiff")
@@ -963,7 +963,7 @@ def test_result_asset_download_folder(con100, requests_mock, tmp_path):
963963
requests_mock.get(href, content=TIFF_CONTENT)
964964

965965
job = BatchJob("jj", connection=con100)
966-
asset = ResultAsset(job, name="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
966+
asset = ResultAsset(job, key="1.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
967967
target = tmp_path / "folder"
968968
target.mkdir()
969969
path = asset.download(target)
@@ -979,7 +979,7 @@ def test_result_asset_load_json(con100, requests_mock):
979979
requests_mock.get(href, json={"bands": [1, 2, 3]})
980980

981981
job = BatchJob("jj", connection=con100)
982-
asset = ResultAsset(job, name="out.json", href=href, metadata={"type": "application/json"})
982+
asset = ResultAsset(job, key="out.json", href=href, metadata={"type": "application/json"})
983983
res = asset.load_json()
984984

985985
assert res == {"bands": [1, 2, 3]}
@@ -991,7 +991,7 @@ def test_result_asset_load_bytes(con100, requests_mock):
991991
requests_mock.get(href, content=TIFF_CONTENT)
992992

993993
job = BatchJob("jj", connection=con100)
994-
asset = ResultAsset(job, name="out.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
994+
asset = ResultAsset(job, key="out.tiff", href=href, metadata={"type": "image/tiff; application=geotiff"})
995995
res = asset.load_bytes()
996996

997997
assert res == TIFF_CONTENT

0 commit comments

Comments
 (0)