|
3 | 3 | import json |
4 | 4 | import logging |
5 | 5 | import os |
6 | | -from typing import Any, Optional |
| 6 | +from typing import Any, Optional, Union |
7 | 7 |
|
8 | 8 | import fsspec |
9 | 9 | import magic |
|
36 | 36 | from workflow_manager.workflow_v2.models.file_history import FileHistory |
37 | 37 | from workflow_manager.workflow_v2.models.workflow import Workflow |
38 | 38 |
|
| 39 | +from backend.exceptions import UnstractFSException |
| 40 | +from unstract.connectors.exceptions import ConnectorError |
| 41 | + |
39 | 42 | logger = logging.getLogger(__name__) |
40 | 43 |
|
41 | 44 |
|
@@ -182,16 +185,17 @@ def handle_output( |
182 | 185 | self.insert_into_db(input_file_path=input_file_path) |
183 | 186 | elif connection_type == WorkflowEndpoint.ConnectionType.API: |
184 | 187 | result = self.get_result(file_history) |
185 | | - metadata = self.get_metadata(file_history) |
| 188 | + exec_metadata = self.get_metadata(file_history) |
186 | 189 | self._handle_api_result( |
187 | | - file_name=file_name, error=error, result=result, metadata=metadata |
| 190 | + file_name=file_name, error=error, result=result, metadata=exec_metadata |
188 | 191 | ) |
189 | 192 | elif connection_type == WorkflowEndpoint.ConnectionType.MANUALREVIEW: |
190 | 193 | self._push_data_to_queue(file_name, workflow, input_file_path) |
191 | 194 | if self.execution_service: |
192 | 195 | self.execution_service.publish_log( |
193 | 196 | message=f"File '{file_name}' processed successfully" |
194 | 197 | ) |
| 198 | + |
195 | 199 | if use_file_history and not file_history: |
196 | 200 | FileHistoryHelper.create_file_history( |
197 | 201 | cache_key=file_hash.file_hash, |
@@ -222,31 +226,33 @@ def copy_output_to_output_directory(self) -> None: |
222 | 226 | destination_volume_path = os.path.join( |
223 | 227 | self.execution_dir, ToolExecKey.OUTPUT_DIR |
224 | 228 | ) |
225 | | - destination_fs.create_dir_if_not_exists(input_dir=output_directory) |
226 | | - destination_fsspec = destination_fs.get_fsspec_fs() |
227 | | - |
228 | | - # Traverse local directory and create the same structure in the |
229 | | - # output_directory |
230 | | - for root, dirs, files in os.walk(destination_volume_path): |
231 | | - for dir_name in dirs: |
232 | | - destination_fsspec.mkdir( |
233 | | - os.path.join( |
| 229 | + |
| 230 | + try: |
| 231 | + destination_fs.create_dir_if_not_exists(input_dir=output_directory) |
| 232 | + |
| 233 | + # Traverse local directory and create the same structure in the |
| 234 | + # output_directory |
| 235 | + for root, dirs, files in os.walk(destination_volume_path): |
| 236 | + for dir_name in dirs: |
| 237 | + current_dir = os.path.join( |
234 | 238 | output_directory, |
235 | 239 | os.path.relpath(root, destination_volume_path), |
236 | 240 | dir_name, |
237 | 241 | ) |
238 | | - ) |
| 242 | + destination_fs.create_dir_if_not_exists(input_dir=current_dir) |
239 | 243 |
|
240 | | - for file_name in files: |
241 | | - source_path = os.path.join(root, file_name) |
242 | | - destination_path = os.path.join( |
243 | | - output_directory, |
244 | | - os.path.relpath(root, destination_volume_path), |
245 | | - file_name, |
246 | | - ) |
247 | | - normalized_path = os.path.normpath(destination_path) |
248 | | - with open(source_path, "rb") as source_file: |
249 | | - destination_fsspec.write_bytes(normalized_path, source_file.read()) |
| 244 | + for file_name in files: |
| 245 | + source_path = os.path.join(root, file_name) |
| 246 | + destination_path = os.path.join( |
| 247 | + output_directory, |
| 248 | + os.path.relpath(root, destination_volume_path), |
| 249 | + file_name, |
| 250 | + ) |
| 251 | + destination_fs.upload_file_to_storage( |
| 252 | + source_path=source_path, destination_path=destination_path |
| 253 | + ) |
| 254 | + except ConnectorError as e: |
| 255 | + raise UnstractFSException(core_err=e) from e |
250 | 256 |
|
251 | 257 | def insert_into_db(self, input_file_path: str) -> None: |
252 | 258 | """Insert data into the database.""" |
@@ -276,7 +282,10 @@ def insert_into_db(self, input_file_path: str) -> None: |
276 | 282 | if not data: |
277 | 283 | return |
278 | 284 | # Remove metadata from result |
279 | | - data.pop("metadata", None) |
| 285 | + # Tool text-extractor returns data in the form of string. |
| 286 | + # Don't pop out metadata in this case. |
| 287 | + if isinstance(data, dict): |
| 288 | + data.pop("metadata", None) |
280 | 289 | values = DatabaseUtils.get_columns_and_values( |
281 | 290 | column_mode_str=column_mode, |
282 | 291 | data=data, |
@@ -401,7 +410,7 @@ def get_result(self, file_history: Optional[FileHistory] = None) -> Optional[Any |
401 | 410 | output_file = os.path.join(self.execution_dir, WorkflowFileType.INFILE) |
402 | 411 | metadata: dict[str, Any] = self.get_workflow_metadata() |
403 | 412 | output_type = self.get_output_type(metadata) |
404 | | - result: Optional[Any] = None |
| 413 | + result: Union[dict[str, Any], str] = "" |
405 | 414 | try: |
406 | 415 | # TODO: SDK handles validation; consider removing here. |
407 | 416 | mime = magic.Magic() |
@@ -431,7 +440,7 @@ def get_metadata( |
431 | 440 | """Get metadata from the output file. |
432 | 441 |
|
433 | 442 | Returns: |
434 | | - Union[dict[str, Any], str]: Meta data. |
| 443 | + Union[dict[str, Any], str]: Metadata. |
435 | 444 | """ |
436 | 445 | if file_history and file_history.metadata: |
437 | 446 | return self.parse_string(file_history.metadata) |
|
0 commit comments