Skip to content

Commit e37caa9

Browse files
committed
add configurable HTTP timeout
introduce NCRO HTTP latency harness test
1 parent ee1fa4c commit e37caa9

File tree

2 files changed

+56
-10
lines changed

2 files changed

+56
-10
lines changed

dms_datastore/download_ncro.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@
3434
REQUEST_CHUNK_YEARS = 5
3535
ALIGN_CHUNKS_TO_YEAR_MODULUS = True
3636
RETAIN_INVENTORY_HOURS=24
37-
NCRO_MAX_WORKERS = 2
37+
NCRO_MAX_WORKERS = 4
38+
NCRO_HTTP_TIMEOUT = 60.0 # seconds; increase if inventory downloads time out
3839

3940
mappings = {
4041
"Water Temperature": "temp",
@@ -109,10 +110,14 @@ def load_inventory():
109110
dbase = dstore_config.station_dbase()
110111
dbase = dbase.loc[dbase["agency"].str.contains("ncro"), :]
111112

112-
with httpx.Client(timeout=60.0) as client:
113-
response = client.get(url)
114-
response.raise_for_status()
115-
data = response.json()
113+
with httpx.Client(timeout=NCRO_HTTP_TIMEOUT) as client:
114+
try:
115+
response = client.get(url)
116+
response.raise_for_status()
117+
data = response.json()
118+
except httpx.ReadTimeout:
119+
logger.error(f"NCRO Inventory: Read timeout fetching site list from {url}")
120+
raise
116121
sites = data["return"]["sites"]
117122
sites_df = pd.DataFrame(sites) # database of all NCRO sites
118123
logger.debug(f"NCRO Inventory: Retrieved list of {len(sites_df)} sites from NCRO")
@@ -125,10 +130,14 @@ def load_inventory():
125130
names = similar_ncro_station_names(origname)
126131

127132
url2 = f"https://wdlhyd.water.ca.gov/hydstra/sites/{','.join(names)}/traces"
128-
with httpx.Client(timeout=60.0) as client:
129-
response = client.get(url2)
130-
response.raise_for_status()
131-
data2 = response.json()
133+
with httpx.Client(timeout=NCRO_HTTP_TIMEOUT) as client:
134+
try:
135+
response = client.get(url2)
136+
response.raise_for_status()
137+
data2 = response.json()
138+
except httpx.ReadTimeout:
139+
logger.error(f"NCRO Inventory: Read timeout fetching traces for station {origname} from {url2}")
140+
raise
132141

133142
# Flatten the JSON
134143
flattened_data = []
@@ -190,7 +199,9 @@ async def _async_download_trace(client, site, trace, stime, etime):
190199
except Exception as e:
191200
logger.debug(f"Exception on attempt {attempt}: " + str(e))
192201
if attempt == max_attempt:
193-
logger.warning("Failed all attempts to download trace for station " + site + " trace " + trace)
202+
logger.warning(
203+
f"Failed all attempts to download trace for station {site} trace {trace} url {url_trace}: {e}"
204+
)
194205
return None
195206
else:
196207
await asyncio.sleep(
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import os
2+
import time
3+
4+
import pytest
5+
import requests
6+
7+
8+
def _env_true(name: str) -> bool:
9+
return os.getenv(name, "").strip().lower() in {"1", "true", "yes", "on"}
10+
11+
12+
def test_ncro_sites_session_get_harness():
13+
if not _env_true("RUN_NCRO_HTTP_HARNESS"):
14+
pytest.skip("Set RUN_NCRO_HTTP_HARNESS=1 to run NCRO HTTP latency harness")
15+
16+
url = os.getenv("NCRO_HARNESS_URL", "https://wdlhyd.water.ca.gov/hydstra/sites")
17+
timeout_seconds = float(os.getenv("NCRO_HARNESS_TIMEOUT", "200"))
18+
max_elapsed_seconds = float(os.getenv("NCRO_HARNESS_MAX_SECONDS", "0"))
19+
20+
session = requests.Session()
21+
start = time.perf_counter()
22+
response = session.get(url, timeout=timeout_seconds)
23+
elapsed = time.perf_counter() - start
24+
25+
assert response.ok, f"HTTP {response.status_code} from {url}"
26+
27+
if max_elapsed_seconds > 0:
28+
assert (
29+
elapsed <= max_elapsed_seconds
30+
), f"session.get({url}) took {elapsed:.2f}s (threshold={max_elapsed_seconds:.2f}s)"
31+
32+
print(
33+
f"NCRO harness: GET {url} completed in {elapsed:.2f}s "
34+
f"(status={response.status_code}, timeout={timeout_seconds:.1f}s)"
35+
)

0 commit comments

Comments
 (0)