11"""File entities"""
22
3+ import zipfile
4+ from abc import abstractmethod
5+ from io import BytesIO
6+ from pathlib import Path
7+ from typing import IO
8+
9+ from cmem .cmempy .workspace .projects .resources .resource import get_resource_response
10+
311from cmem_plugin_base .dataintegration .entity import Entity , EntityPath
412from cmem_plugin_base .dataintegration .typed_entities import instance_uri , path_uri , type_uri
513from cmem_plugin_base .dataintegration .typed_entities .typed_entities import (
816
917
1018class File :
11- """A file entity that can be held in a FileEntitySchema."""
19+ """A file entity that can be held in a FileEntitySchema.
20+
21+ :param path: The file path.
22+ :param file_type: The type of the file (one of: "Local", "Project").
23+ :param mime: The MIME type of the file, if known.
24+ :param entry_path: If the file path points to a archive, the entry within the archive.
25+ """
1226
13- def __init__ (self , path : str , file_type : str , mime : str | None ) -> None :
27+ def __init__ (self , path : str , file_type : str , mime : str | None , entry_path : str | None ) -> None :
1428 self .path = path
1529 self .file_type = file_type
1630 self .mime = mime
31+ self .entry_path = entry_path
32+
33+ @abstractmethod
34+ def read_stream (self , project_id : str ) -> IO [bytes ]:
35+ """Open the referenced file as a stream.
36+
37+ Returns a file-like object (stream) in binary mode.
38+ Caller is responsible for closing the stream.
39+ """
1740
1841
1942class LocalFile (File ):
2043 """A file that's located on the local file system."""
2144
22- def __init__ (self , path : str , mime : str | None = None ) -> None :
23- super ().__init__ (path , "Local" , mime )
45+ def __init__ (self , path : str , mime : str | None = None , entry_path : str | None = None ) -> None :
46+ super ().__init__ (path , "Local" , mime , entry_path )
47+
48+ def read_stream (self , project_id : str ) -> IO [bytes ]:
49+ """Open the referenced file as a stream.
50+
51+ Returns a file-like object (stream) in binary mode.
52+ Caller is responsible for closing the stream.
53+ """
54+ if self .entry_path :
55+ archive = zipfile .ZipFile (self .path , "r" )
56+ try :
57+ return archive .open (self .entry_path , "r" )
58+ except KeyError as err :
59+ archive .close ()
60+ raise FileNotFoundError (
61+ f"Entry '{ self .entry_path } ' not found in archive '{ self .path } '."
62+ ) from err
63+ else :
64+ if not Path (self .path ).is_file ():
65+ raise FileNotFoundError (f"File '{ self .path } ' does not exist." )
66+ return Path (self .path ).open ("rb" )
2467
2568
2669class ProjectFile (File ):
2770 """A project file"""
2871
29- def __init__ (self , path : str , mime : str | None = None ) -> None :
30- super ().__init__ (path , "Project" , mime )
72+ def __init__ (self , path : str , mime : str | None = None , entry_path : str | None = None ) -> None :
73+ super ().__init__ (path , "Project" , mime , entry_path )
74+
75+ def read_stream (self , project_id : str ) -> IO [bytes ]:
76+ """Open the referenced file as a stream.
77+
78+ Returns a file-like object (stream) in binary mode.
79+ Caller is responsible for closing the stream.
80+ """
81+ response = get_resource_response (project_id , self .path )
82+ if response .status_code != 200 : # noqa: PLR2004
83+ raise FileNotFoundError (f"Project file '{ self .path } ' not found." )
84+ response_bytes = BytesIO (response .raw .read ())
85+ if self .entry_path :
86+ archive = zipfile .ZipFile (response_bytes , "r" )
87+ try :
88+ return archive .open (self .entry_path , "r" )
89+ except KeyError as err :
90+ archive .close ()
91+ raise FileNotFoundError (
92+ f"Entry '{ self .entry_path } ' not found in project file '{ self .path } '."
93+ ) from err
94+ else :
95+ return response_bytes
3196
3297
3398class FileEntitySchema (TypedEntitySchema [File ]):
@@ -49,18 +114,25 @@ def to_entity(self, value: File) -> Entity:
49114 """Create a generic entity from a file"""
50115 return Entity (
51116 uri = instance_uri (value .path ),
52- values = [[value .path ], [value .file_type ], [value .mime or "" ]],
117+ values = [
118+ [value .path ],
119+ [value .file_type ],
120+ [value .mime ] if value .mime else [],
121+ [value .entry_path ] if value .entry_path else [],
122+ ],
53123 )
54124
55125 def from_entity (self , entity : Entity ) -> File :
56126 """Create a file entity from a generic entity."""
57127 path = entity .values [0 ][0 ]
58128 file_type = entity .values [1 ][0 ]
59- mime = entity .values [2 ][0 ] if entity .values [2 ][0 ] else None
129+ mime = entity .values [2 ][0 ] if entity .values [2 ] and entity .values [2 ][0 ] else None
130+ entry_path = entity .values [3 ][0 ] if entity .values [3 ] and entity .values [3 ][0 ] else None
131+
60132 match file_type :
61133 case "Local" :
62- return LocalFile (path , mime )
134+ return LocalFile (path , mime , entry_path )
63135 case "Project" :
64- return ProjectFile (path , mime )
136+ return ProjectFile (path , mime , entry_path )
65137 case _:
66138 raise ValueError (f"File '{ path } ' has unexpected type '{ file_type } '." )
0 commit comments