Skip to content

Commit cec237c

Browse files
Add functionality to easily load output files from previous executions into a running app (#58)
1 parent d022b23 commit cec237c

8 files changed

Lines changed: 279 additions & 2 deletions

File tree

dev-requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ isort
55
pydata-sphinx-theme == 0.11.0
66
pytest
77
pytest-cov
8-
sphinx == 5.3.0
8+
sphinx == 5.3.0
9+
myst-parser<2.0

docs/api_ref/resultcollector.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ ResultCollector
1212
:toctree:
1313
:template: class.rst
1414

15+
load_previous_engineroom_results
1516
ResultCollector

docs/user_guide/concepts_utils/collect_results.rst

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,30 @@
11
Collect and store results
22
=========================
3+
Aapplication results can be stored inside a dedicated folder, so that the application output is easily available after app execution
4+
5+
Application results can be any set of files (or files hierarchy), that is produced by your application, for example output CSV files
6+
containing processed data. Results are moved to a permanent store after every execution, regardless of exit code.
7+
8+
Output files are appended to the result set from previous execution. If your application creates a file with the same name, it will
9+
override the previous result file. If you need to append to a file from a previous execution, the file must be loaded into the
10+
application first. This can be achieved using :meth:`~fourinsight.engineroom.utils.load_previous_engineroom_results`, which will
11+
load the results from the permanent store into the application.
12+
13+
14+
.. code-block:: python
15+
16+
from fourinsight.api import UserSession
17+
from fourinsight.engineroom.utils import load_previous_engineroom_results
18+
19+
session = UserSession()
20+
21+
#download all available results
22+
load_previous_engineroom_results(ENGINE_ROOM_APP_ID, session, download_all=True)
23+
24+
#download specific results file
25+
load_previous_engineroom_results(ENGINE_ROOM_APP_ID, session, path="config.json")
26+
27+
328
The :class:`~fourinsight.engineroom.utils.ResultCollector` is a useful tool when you want to collect and store results.
429
The basic usage is illustrated with the examples below.
530

fourinsight/engineroom/utils/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
PersistentDict,
88
PersistentJSON,
99
ResultCollector,
10+
load_previous_engineroom_results,
1011
)
1112
from ._datamanage import (
1213
BaseDataSource,
@@ -36,4 +37,5 @@
3637
"PersistentDict",
3738
"PersistentJSON",
3839
"ResultCollector",
40+
"load_previous_engineroom_results",
3941
]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
API_BASE_URL = "https://api.4insight.io"

fourinsight/engineroom/utils/_core.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import json
2+
import urllib.parse
3+
import warnings
24
from abc import abstractmethod
35
from collections.abc import MutableMapping
46
from io import BytesIO, TextIOWrapper
@@ -8,6 +10,8 @@
810
from azure.core.exceptions import ResourceNotFoundError
911
from azure.storage.blob import BlobClient
1012

13+
from ._constants import API_BASE_URL
14+
1115

1216
class BaseHandler(TextIOWrapper):
1317
"""
@@ -548,3 +552,73 @@ def truncate(self, before=None, after=None):
548552
index_drop.extend(self._dataframe.index[(self._dataframe.index > after)])
549553
if index_drop:
550554
self.delete_rows(index_drop)
555+
556+
557+
def _get_all_previous_file_names(app_id, session):
558+
"""query all available results file from the EngineRoom application. Returns list of dicts"""
559+
response = session.get(f"{API_BASE_URL}/v1.0/Applications/{app_id}/results")
560+
response.raise_for_status()
561+
results = response.json()
562+
if not results:
563+
warnings.warn(f"No results found for application ID {app_id}.", UserWarning)
564+
return results
565+
566+
567+
def _build_download_url(app_id, navigable_file_name):
568+
safe_name = urllib.parse.quote(navigable_file_name)
569+
return f"{API_BASE_URL}/v1.0/Applications/{app_id}/results/{safe_name}/download"
570+
571+
572+
def _download_and_save_file(session, download_url, save_path):
573+
save_path.parent.mkdir(parents=True, exist_ok=True)
574+
response = session.get(download_url)
575+
response.raise_for_status()
576+
with open(save_path, "wb") as f:
577+
f.write(response.content)
578+
579+
580+
def load_previous_engineroom_results(
581+
app_id, session, path=None, download_all=False, output_folder="output"
582+
):
583+
"""
584+
Load past EngineRoom results from a specified application and
585+
store locally in the same output folder
586+
587+
Parameters
588+
----------
589+
app_id : str
590+
The EngineRoom application ID.
591+
session : 4insight session object
592+
Authorized 4insight session.
593+
path : str or Path, optional
594+
The file path within the EngineRoom output folder.
595+
Ignored if download_all is True.
596+
download_all : bool, optional
597+
If True, download all results in the output folder. Defaults to False.
598+
output_folder : str, optional
599+
Name of the EngineRoom output folder. Defaults to "output".
600+
601+
"""
602+
output_folder = Path(output_folder)
603+
available_results = _get_all_previous_file_names(app_id, session)
604+
if not available_results:
605+
return
606+
607+
available_file_names = [file["fileName"] for file in available_results]
608+
navigable_file_names = [file["navigableFileName"] for file in available_results]
609+
610+
if download_all:
611+
for file_name, nav_name in zip(available_file_names, navigable_file_names):
612+
file_path = output_folder / file_name
613+
download_url = _build_download_url(app_id, nav_name)
614+
_download_and_save_file(session, download_url, file_path)
615+
else:
616+
if path not in available_file_names:
617+
warnings.warn(
618+
f"{path} not found in application {app_id} results.", UserWarning
619+
)
620+
else:
621+
idx = available_file_names.index(path)
622+
file_path = output_folder / path
623+
download_url = _build_download_url(app_id, navigable_file_names[idx])
624+
_download_and_save_file(session, download_url, file_path)

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ install_requires =
2222
azure-storage-blob >= 12.4.0
2323
pyarrow
2424
numpy
25+
fourinsight-api
2526

2627
[options.packages.find]
2728
include =

tests/test_core.py

Lines changed: 173 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import json
2+
import urllib.parse
3+
import warnings
24
from io import BytesIO, TextIOWrapper
35
from pathlib import Path
4-
from unittest.mock import Mock, patch
6+
from unittest.mock import ANY, MagicMock, Mock, mock_open, patch
57

68
import numpy as np
79
import pandas as pd
@@ -15,9 +17,16 @@
1517
NullHandler,
1618
PersistentDict,
1719
ResultCollector,
20+
load_previous_engineroom_results,
21+
)
22+
from fourinsight.engineroom.utils._core import (
23+
_build_download_url,
24+
_download_and_save_file,
25+
_get_all_previous_file_names,
1826
)
1927

2028
REMOTE_FILE_PATH = Path(__file__).parent / "testdata/a_test_file.json"
29+
API_BASE_URL = "https://api.4insight.io"
2130

2231

2332
@pytest.fixture
@@ -55,6 +64,25 @@ def azure_blob_handler_mocked(mock_from_connection_string):
5564
return handler
5665

5766

67+
@pytest.fixture
68+
def previous_file_names():
69+
filenames = [
70+
{
71+
"fileName": "config.json",
72+
"navigableFileName": "config.json",
73+
"safeName": "config.json",
74+
},
75+
{
76+
"fileName": "SN00569 - 23AT/2024-12-02_131637/sensor_info/sensor_info.csv",
77+
"navigableFileName": "SN00569 - 23AT*2024-12-02_131637*sensor_info*sensor_info.csv",
78+
"safeName": urllib.parse.quote(
79+
"SN00569 - 23AT*2024-12-02_131637*sensor_info*sensor_info.csv"
80+
),
81+
},
82+
]
83+
return filenames
84+
85+
5886
class Test_BaseHandler:
5987
def test__init__(self):
6088
handler = BaseHandler()
@@ -1210,3 +1238,147 @@ def test_delete_rows_truncate_int_both_none(self):
12101238
).astype({"a": "float64", "b": "string", "c": "Int64", "d": "float64"})
12111239

12121240
pd.testing.assert_frame_equal(df_out, df_expect)
1241+
1242+
1243+
def test__build_download_url(previous_file_names):
1244+
app_id = "12345"
1245+
for i in range(len(previous_file_names)):
1246+
navigable_filename = previous_file_names[i]["navigableFileName"]
1247+
safe_name = previous_file_names[i]["safeName"]
1248+
url = _build_download_url(app_id, navigable_filename)
1249+
assert (
1250+
url
1251+
== f"{API_BASE_URL}/v1.0/Applications/{app_id}/results/{safe_name}/download"
1252+
)
1253+
1254+
1255+
class Test__download_and_save_file:
1256+
def setup_method(self):
1257+
# Common mocks
1258+
self.mock_session = MagicMock()
1259+
self.mock_response = MagicMock()
1260+
self.mock_response.content = b"this is the files"
1261+
self.mock_session.get.return_value = self.mock_response
1262+
1263+
self.url = "https://4insight.io/engineroom/result1.csv"
1264+
self.path = Path("output/results1.csv")
1265+
1266+
@patch("fourinsight.engineroom.utils._core.open", new_callable=mock_open)
1267+
@patch.object(Path, "mkdir")
1268+
def test_download_success(self, mock_mkdir, mock_file):
1269+
_download_and_save_file(self.mock_session, self.url, self.path)
1270+
1271+
self.mock_session.get.assert_called_once_with(self.url)
1272+
self.mock_response.raise_for_status.assert_called_once()
1273+
mock_file.assert_called_once_with(self.path, "wb")
1274+
mock_file().write.assert_called_once_with(b"this is the files")
1275+
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
1276+
1277+
@patch("fourinsight.engineroom.utils._core.open", new_callable=mock_open)
1278+
@patch.object(Path, "mkdir")
1279+
def test_raises_exception_on_http_error(self, mock_mkdir, mock_file):
1280+
self.mock_response.raise_for_status.side_effect = RuntimeError("HTTP error")
1281+
1282+
with pytest.raises(RuntimeError, match="HTTP error"):
1283+
_download_and_save_file(self.mock_session, self.url, self.path)
1284+
1285+
@patch("fourinsight.engineroom.utils._core.open", new_callable=mock_open)
1286+
@patch.object(Path, "mkdir")
1287+
def test_creates_directory_if_missing(self, mock_mkdir, mock_file):
1288+
_download_and_save_file(self.mock_session, self.url, self.path)
1289+
mock_mkdir.assert_called_once_with(parents=True, exist_ok=True)
1290+
1291+
1292+
class Test__get_all_previous_file_names:
1293+
def setup_method(self):
1294+
# Common mocks
1295+
self.mock_session = MagicMock()
1296+
self.mock_response = MagicMock()
1297+
1298+
self.mock_response.raise_for_status.return_value = None
1299+
self.mock_session.get.return_value = self.mock_response
1300+
1301+
self.url = "https://4insight.io/engineroom/result1.csv"
1302+
self.path = Path("output/results1.csv")
1303+
1304+
def test_successful_response(self, previous_file_names):
1305+
self.mock_response.json.return_value = previous_file_names
1306+
app_id = "app123"
1307+
results = _get_all_previous_file_names(app_id, self.mock_session)
1308+
assert results == previous_file_names
1309+
self.mock_session.get.assert_called_once_with(
1310+
f"{API_BASE_URL}/v1.0/Applications/{app_id}/results"
1311+
)
1312+
self.mock_response.raise_for_status.assert_called_once()
1313+
1314+
def test_empty_results_returns_warning(self):
1315+
self.mock_response.json.return_value = []
1316+
app_id = "app123"
1317+
with pytest.warns(
1318+
UserWarning, match=f"No results found for application ID {app_id}."
1319+
):
1320+
_get_all_previous_file_names(app_id, self.mock_session)
1321+
1322+
1323+
class Test_load_previous_engineroom_results:
1324+
def setup_method(self):
1325+
self.mock_session = MagicMock()
1326+
self.mock_response = MagicMock()
1327+
1328+
self.mock_response.raise_for_status.return_value = None
1329+
self.mock_session.get.return_value = self.mock_response
1330+
1331+
self.url = "https://4insight.io/engineroom/result1.csv"
1332+
self.path = Path("output/results1.csv")
1333+
1334+
@patch("fourinsight.engineroom.utils._core._download_and_save_file")
1335+
@patch("fourinsight.engineroom.utils._core._get_all_previous_file_names")
1336+
def test_download_all(
1337+
self,
1338+
mock__get_all_previous_file_names,
1339+
mock__download_and_save_file,
1340+
previous_file_names,
1341+
):
1342+
mock__get_all_previous_file_names.return_value = previous_file_names
1343+
1344+
load_previous_engineroom_results("app123", self.mock_session, download_all=True)
1345+
assert mock__get_all_previous_file_names.call_count == 1
1346+
assert mock__download_and_save_file.call_count == len(previous_file_names)
1347+
for i in range(len(previous_file_names)):
1348+
mock__download_and_save_file.assert_any_call(
1349+
self.mock_session,
1350+
ANY,
1351+
Path("output") / previous_file_names[i]["fileName"],
1352+
)
1353+
1354+
@patch("fourinsight.engineroom.utils._core._download_and_save_file")
1355+
@patch("fourinsight.engineroom.utils._core._get_all_previous_file_names")
1356+
def test_download_single_file(
1357+
self,
1358+
mock__get_all_previous_file_names,
1359+
mock__download_and_save_file,
1360+
previous_file_names,
1361+
):
1362+
mock__get_all_previous_file_names.return_value = previous_file_names
1363+
1364+
load_previous_engineroom_results(
1365+
"app123", self.mock_session, previous_file_names[1]["fileName"]
1366+
)
1367+
assert mock__get_all_previous_file_names.call_count == 1
1368+
mock__download_and_save_file.assert_called_once_with(
1369+
self.mock_session, ANY, Path("output") / previous_file_names[1]["fileName"]
1370+
)
1371+
1372+
@patch("fourinsight.engineroom.utils._core._get_all_previous_file_names")
1373+
def test_raise_when_file_not_found(
1374+
self, mock__get_all_previous_file_names, previous_file_names
1375+
):
1376+
mock__get_all_previous_file_names.return_value = previous_file_names
1377+
1378+
with pytest.warns(
1379+
UserWarning,
1380+
match="missing_file.json not found in application app123 results.",
1381+
):
1382+
load_previous_engineroom_results(
1383+
"app123", self.mock_session, "missing_file.json"
1384+
)

0 commit comments

Comments
 (0)