Skip to content

Commit 16e9cb7

Browse files
committed
Merge branch 'main' into feature/improveTypedEntities-CMEM-6243
2 parents cbdd889 + 2366c25 commit 16e9cb7

6 files changed

Lines changed: 405 additions & 270 deletions

File tree

CHANGELOG.md

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,33 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
1111

1212
- RDF Quad entity type (CMEM-6243).
1313

14-
## [4.10.1] 2025-05-08 - shipped with DI v25.1.1
14+
15+
## [4.11.0] 2025-06-19
16+
17+
### Added
18+
19+
- `File` entities: add `entry_path` attribute and `read_stream` method
20+
21+
### Changed
22+
23+
- dependency cmem-cmempy >=25.2.0
24+
- dependency python-ulid ^3.0.0
25+
26+
27+
## [4.10.2] 2025-05-15 - shipped with DI v25.1.1
28+
29+
### Fixed
30+
31+
- Resolved an issue in `FileEntitySchema` for empty MIME types (CMEM-6623).
32+
33+
34+
## [4.10.1] 2025-05-08
1535

1636
### Fixed
1737

1838
- Adapted FileEntitySchema so it can be used with datasets (CMEM-6615).
1939

40+
2041
## [4.10.0] 2025-03-31 - shipped with DI v25.1.0
2142

2243
### Added
@@ -28,6 +49,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
2849

2950
- Check if passwords can be decrypted, i.e., if the key is valid (CMEM-5932)
3051

52+
3153
## [4.9.0] 2025-02-20
3254

3355
### Added

TaskfileCustom.yaml

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
version: '3'
3+
4+
tasks:
5+
6+
container:install:
7+
desc: "Install package in container (then restart)"
8+
summary: >
9+
Note that `cmemc admin workspace python uninstall cmem-plugin-base` will
10+
uninstall the dev-version and reveal the shipped package version again.
11+
cmds:
12+
- task clean build
13+
- docker cp dist/{{.PACKAGE_FILE}} {{.DI_CONTAINER}}:/tmp/
14+
- docker exec -t {{.DI_CONTAINER}} bash -c "{{.PIP_COMMAND}} /tmp/{{.PACKAGE_FILE}}"
15+
- docker exec -t {{.DI_CONTAINER}} bash -c "rm -f /tmp/{{.PACKAGE_FILE}}"
16+
- docker restart {{.DI_CONTAINER}}
17+
preconditions:
18+
- sh: '[ "{{.DI_RUNNING}}" == "true" ]'
19+
msg: "Container {{.DI_CONTAINER}} not running ({{.DI_RUNNING}})"
20+
vars:
21+
DI_CONTAINER: dockerlocalhost-dataintegration-1
22+
DI_RUNNING:
23+
sh: docker inspect -f '{{ "{{" }}.State.Running{{ "}}" }}' {{.DI_CONTAINER}}
24+
PACKAGE: cmem_plugin_base
25+
PACKAGE_VERSION:
26+
sh: poetry version -s
27+
PACKAGE_FILE: "{{.PACKAGE}}-{{.PACKAGE_VERSION}}.tar.gz"
28+
PIP_COMMAND: pip install --target /data/python-packages -q --upgrade
29+

cmem_plugin_base/dataintegration/typed_entities/file.py

Lines changed: 82 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
"""File entities"""
22

3+
import zipfile
4+
from abc import abstractmethod
5+
from io import BytesIO
6+
from pathlib import Path
7+
from typing import IO
8+
9+
from cmem.cmempy.workspace.projects.resources.resource import get_resource_response
10+
311
from cmem_plugin_base.dataintegration.entity import Entity, EntityPath
412
from cmem_plugin_base.dataintegration.typed_entities import instance_uri, path_uri, type_uri
513
from cmem_plugin_base.dataintegration.typed_entities.typed_entities import (
@@ -8,26 +16,83 @@
816

917

1018
class File:
11-
"""A file entity that can be held in a FileEntitySchema."""
19+
"""A file entity that can be held in a FileEntitySchema.
20+
21+
:param path: The file path.
22+
:param file_type: The type of the file (one of: "Local", "Project").
23+
:param mime: The MIME type of the file, if known.
24+
:param entry_path: If the file path points to a archive, the entry within the archive.
25+
"""
1226

13-
def __init__(self, path: str, file_type: str, mime: str | None) -> None:
27+
def __init__(self, path: str, file_type: str, mime: str | None, entry_path: str | None) -> None:
1428
self.path = path
1529
self.file_type = file_type
1630
self.mime = mime
31+
self.entry_path = entry_path
32+
33+
@abstractmethod
34+
def read_stream(self, project_id: str) -> IO[bytes]:
35+
"""Open the referenced file as a stream.
36+
37+
Returns a file-like object (stream) in binary mode.
38+
Caller is responsible for closing the stream.
39+
"""
1740

1841

1942
class LocalFile(File):
2043
"""A file that's located on the local file system."""
2144

22-
def __init__(self, path: str, mime: str | None = None) -> None:
23-
super().__init__(path, "Local", mime)
45+
def __init__(self, path: str, mime: str | None = None, entry_path: str | None = None) -> None:
46+
super().__init__(path, "Local", mime, entry_path)
47+
48+
def read_stream(self, project_id: str) -> IO[bytes]:
49+
"""Open the referenced file as a stream.
50+
51+
Returns a file-like object (stream) in binary mode.
52+
Caller is responsible for closing the stream.
53+
"""
54+
if self.entry_path:
55+
archive = zipfile.ZipFile(self.path, "r")
56+
try:
57+
return archive.open(self.entry_path, "r")
58+
except KeyError as err:
59+
archive.close()
60+
raise FileNotFoundError(
61+
f"Entry '{self.entry_path}' not found in archive '{self.path}'."
62+
) from err
63+
else:
64+
if not Path(self.path).is_file():
65+
raise FileNotFoundError(f"File '{self.path}' does not exist.")
66+
return Path(self.path).open("rb")
2467

2568

2669
class ProjectFile(File):
2770
"""A project file"""
2871

29-
def __init__(self, path: str, mime: str | None = None) -> None:
30-
super().__init__(path, "Project", mime)
72+
def __init__(self, path: str, mime: str | None = None, entry_path: str | None = None) -> None:
73+
super().__init__(path, "Project", mime, entry_path)
74+
75+
def read_stream(self, project_id: str) -> IO[bytes]:
76+
"""Open the referenced file as a stream.
77+
78+
Returns a file-like object (stream) in binary mode.
79+
Caller is responsible for closing the stream.
80+
"""
81+
response = get_resource_response(project_id, self.path)
82+
if response.status_code != 200: # noqa: PLR2004
83+
raise FileNotFoundError(f"Project file '{self.path}' not found.")
84+
response_bytes = BytesIO(response.raw.read())
85+
if self.entry_path:
86+
archive = zipfile.ZipFile(response_bytes, "r")
87+
try:
88+
return archive.open(self.entry_path, "r")
89+
except KeyError as err:
90+
archive.close()
91+
raise FileNotFoundError(
92+
f"Entry '{self.entry_path}' not found in project file '{self.path}'."
93+
) from err
94+
else:
95+
return response_bytes
3196

3297

3398
class FileEntitySchema(TypedEntitySchema[File]):
@@ -49,18 +114,25 @@ def to_entity(self, value: File) -> Entity:
49114
"""Create a generic entity from a file"""
50115
return Entity(
51116
uri=instance_uri(value.path),
52-
values=[[value.path], [value.file_type], [value.mime or ""]],
117+
values=[
118+
[value.path],
119+
[value.file_type],
120+
[value.mime] if value.mime else [],
121+
[value.entry_path] if value.entry_path else [],
122+
],
53123
)
54124

55125
def from_entity(self, entity: Entity) -> File:
56126
"""Create a file entity from a generic entity."""
57127
path = entity.values[0][0]
58128
file_type = entity.values[1][0]
59-
mime = entity.values[2][0] if entity.values[2][0] else None
129+
mime = entity.values[2][0] if entity.values[2] and entity.values[2][0] else None
130+
entry_path = entity.values[3][0] if entity.values[3] and entity.values[3][0] else None
131+
60132
match file_type:
61133
case "Local":
62-
return LocalFile(path, mime)
134+
return LocalFile(path, mime, entry_path)
63135
case "Project":
64-
return ProjectFile(path, mime)
136+
return ProjectFile(path, mime, entry_path)
65137
case _:
66138
raise ValueError(f"File '{path}' has unexpected type '{file_type}'.")

0 commit comments

Comments
 (0)