11"""File entities"""
22
3+ import zipfile
4+ from abc import abstractmethod
5+ from io import BytesIO
6+ from pathlib import Path
7+ from typing import IO
8+
9+ from cmem .cmempy .workspace .projects .resources .resource import get_resource_response
10+
311from cmem_plugin_base .dataintegration .entity import Entity , EntityPath
412from cmem_plugin_base .dataintegration .typed_entities import instance_uri , path_uri , type_uri
513from cmem_plugin_base .dataintegration .typed_entities .typed_entities import (
816
917
1018class File :
11- """A file entity that can be held in a FileEntitySchema."""
19+ """A file entity that can be held in a FileEntitySchema.
20+
21+ :param path: The file path.
22+ :param file_type: The type of the file (one of: "Local", "Project").
23+ :param mime: The MIME type of the file, if known.
24+ :param entry_path: If the file path points to a archive, the entry within the archive.
25+ """
1226
13- def __init__ (self , path : str , file_type : str , mime : str | None ) -> None :
27+ def __init__ (self , path : str , file_type : str , mime : str | None , entry_path : str | None ) -> None :
1428 self .path = path
1529 self .file_type = file_type
1630 self .mime = mime
31+ self .entry_path = entry_path
32+
33+ @abstractmethod
34+ def read_stream (self , project_id : str ) -> IO [bytes ]:
35+ """Open the referenced file as a stream.
36+
37+ Returns a file-like object (stream) in binary mode.
38+ Caller is responsible for closing the stream.
39+ """
1740
1841
1942class LocalFile (File ):
2043 """A file that's located on the local file system."""
2144
22- def __init__ (self , path : str , mime : str | None = None ) -> None :
23- super ().__init__ (path , "Local" , mime )
45+ def __init__ (self , path : str , mime : str | None = None , entry_path : str | None = None ) -> None :
46+ super ().__init__ (path , "Local" , mime , entry_path )
47+
48+ def read_stream (self , project_id : str ) -> IO [bytes ]:
49+ """Open the referenced file as a stream.
50+
51+ Returns a file-like object (stream) in binary mode.
52+ Caller is responsible for closing the stream.
53+ """
54+ if self .entry_path :
55+ archive = zipfile .ZipFile (self .path , "r" )
56+ try :
57+ return archive .open (self .entry_path , "r" )
58+ except KeyError as err :
59+ archive .close ()
60+ raise FileNotFoundError (
61+ f"Entry '{ self .entry_path } ' not found in archive '{ self .path } '."
62+ ) from err
63+ else :
64+ if not Path (self .path ).is_file ():
65+ raise FileNotFoundError (f"File '{ self .path } ' does not exist." )
66+ return Path (self .path ).open ("rb" )
2467
2568
2669class ProjectFile (File ):
2770 """A project file"""
2871
29- def __init__ (self , path : str , mime : str | None = None ) -> None :
30- super ().__init__ (path , "Project" , mime )
72+ def __init__ (self , path : str , mime : str | None = None , entry_path : str | None = None ) -> None :
73+ super ().__init__ (path , "Project" , mime , entry_path )
74+
75+ def read_stream (self , project_id : str ) -> IO [bytes ]:
76+ """Open the referenced file as a stream.
77+
78+ Returns a file-like object (stream) in binary mode.
79+ Caller is responsible for closing the stream.
80+ """
81+ response = get_resource_response (project_id , self .path )
82+ if response .status_code != 200 : # noqa: PLR2004
83+ raise FileNotFoundError (f"Project file '{ self .path } ' not found." )
84+ response_bytes = BytesIO (response .raw .read ())
85+ if self .entry_path :
86+ archive = zipfile .ZipFile (response_bytes , "r" )
87+ try :
88+ return archive .open (self .entry_path , "r" )
89+ except KeyError as err :
90+ archive .close ()
91+ raise FileNotFoundError (
92+ f"Entry '{ self .entry_path } ' not found in project file '{ self .path } '."
93+ ) from err
94+ else :
95+ return response_bytes
3196
3297
3398class FileEntitySchema (TypedEntitySchema [File ]):
@@ -40,25 +105,33 @@ def __init__(self):
40105 EntityPath (path_uri ("filePath" ), is_single_value = True ),
41106 EntityPath (path_uri ("fileType" ), is_single_value = True ),
42107 EntityPath (path_uri ("mimeType" ), is_single_value = True ),
108+ EntityPath (path_uri ("entryPath" ), is_single_value = True ),
43109 ],
44110 )
45111
46112 def to_entity (self , value : File ) -> Entity :
47113 """Create a generic entity from a file"""
48114 return Entity (
49115 uri = instance_uri (value .path ),
50- values = [[value .path ], [value .file_type ], [value .mime ] if value .mime else []],
116+ values = [
117+ [value .path ],
118+ [value .file_type ],
119+ [value .mime ] if value .mime else [],
120+ [value .entry_path ] if value .entry_path else [],
121+ ],
51122 )
52123
53124 def from_entity (self , entity : Entity ) -> File :
54125 """Create a file entity from a generic entity."""
55126 path = entity .values [0 ][0 ]
56127 file_type = entity .values [1 ][0 ]
57128 mime = entity .values [2 ][0 ] if entity .values [2 ] and entity .values [2 ][0 ] else None
129+ entry_path = entity .values [3 ][0 ] if entity .values [3 ] and entity .values [3 ][0 ] else None
130+
58131 match file_type :
59132 case "Local" :
60- return LocalFile (path , mime )
133+ return LocalFile (path , mime , entry_path )
61134 case "Project" :
62- return ProjectFile (path , mime )
135+ return ProjectFile (path , mime , entry_path )
63136 case _:
64137 raise ValueError (f"File '{ path } ' has unexpected type '{ file_type } '." )
0 commit comments