Skip to content

Commit 55fd849

Browse files
add cdippy lib version info to requests and mock requests in tests (#17)
* add cdippy lib version info to requests add loggers to cdipnc and url_utils mock all unit tests * fix uninitialized variable in mop_data * fix 'start' dates interpreted as local time rather than utc
1 parent 6f9ae0b commit 55fd849

22 files changed

Lines changed: 446 additions & 320 deletions

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
.venv
55
.vscode
66
**/.coverage
7+
uv.lock
78

89
# build files
910
**/*.egg-info
10-
**/__pycache__
11+
**/__pycache__
12+
**/build

cdippy/cdipnc.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime, timedelta, timezone
22
import os
33

4+
import logging
45
import netCDF4
56

67
import numpy as np
@@ -12,6 +13,9 @@
1213
import cdippy.url_utils as uu
1314

1415

16+
logger = logging.getLogger(__name__)
17+
18+
1519
class CDIPnc:
1620
"""A base class used by the class StnData for retrieving data from
1721
CDIP netCDF (nc) files located either locally or remotely.
@@ -144,7 +148,7 @@ def set_request_info(
144148
is set to True, only nonpub records will be returned.
145149
"""
146150
if start is None:
147-
start = datetime(1975, 1, 1)
151+
start = datetime(1975, 1, 1).replace(tzinfo=timezone.utc)
148152
if end is None:
149153
end = datetime.now(timezone.utc)
150154
self.set_timespan(start, end)
@@ -156,14 +160,19 @@ def set_request_info(
156160
def set_timespan(self, start, end):
157161
"""Sets request timespan"""
158162
if isinstance(start, str):
159-
self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
163+
self.start_dt = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
164+
tzinfo=timezone.utc
165+
)
160166
else:
161167
self.start_dt = start
162168
if isinstance(end, str):
163-
self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S")
169+
self.end_dt = datetime.strptime(end, "%Y-%m-%d %H:%M:%S").replace(
170+
tzinfo=timezone.utc
171+
)
164172
else:
165173
self.end_dt = end
166174
self.start_stamp = cu.datetime_to_timestamp(self.start_dt)
175+
167176
self.end_stamp = cu.datetime_to_timestamp(self.end_dt)
168177

169178
def get_request(self) -> dict:
@@ -365,23 +374,25 @@ def __get_indices(self, times: list, start_stamp: int, end_stamp: int) -> tuple:
365374
e_idx = bisect_right(times, end_stamp, s_idx)
366375
return s_idx, e_idx
367376

368-
def get_nc(self, url: str = None) -> netCDF4.Dataset:
369-
if url is None:
377+
def get_nc(self, url: str = None, retry: bool = False) -> netCDF4.Dataset:
378+
if not url:
370379
url = self.url
371-
# Check if the html page or file exists
372-
if (
373-
url[0:4] == "http" and not uu.url_exists(url + ".html")
374-
) and not os.path.isfile(url):
375-
return None
376380
try:
377-
nc = netCDF4.Dataset(url)
378-
except Exception:
381+
return netCDF4.Dataset(url)
382+
except Exception as e:
379383
# Try again if unsuccessful (nc file not ready? THREDDS problem?)
380-
try:
381-
nc = netCDF4.Dataset(url)
382-
except Exception:
383-
nc = None
384-
return nc
384+
if retry:
385+
logger.warning(
386+
msg=f"Retrying to open dataset at {url} due to an unexpected exception: {e}"
387+
)
388+
try:
389+
return netCDF4.Dataset(url)
390+
except Exception:
391+
pass
392+
logger.exception(
393+
msg=f"Failed to open dataset at {url} due to an unexpected exception: {e}"
394+
)
395+
return None
385396

386397
def byte_arr_to_string(self, b_arr: np.ma.masked_array) -> str:
387398
if np.ma.is_masked(b_arr):
@@ -1027,7 +1038,3 @@ def __init__(self, stn, data_dir=None, org=None):
10271038
"""For parameters see CDIPnc.set_dataset_info."""
10281039
CDIPnc.__init__(self, data_dir)
10291040
self.set_dataset_info(stn, org, "realtimexy")
1030-
1031-
1032-
if __name__ == "__main__":
1033-
pass

cdippy/mopdata.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime, timedelta
1+
from datetime import datetime, timedelta, timezone
22
from bisect import bisect_left
33

44
from cdippy.cdipnc import CDIPnc
@@ -235,7 +235,9 @@ def get_series(
235235

236236
if start is not None and end is None: # Target time
237237
if isinstance(start, str):
238-
start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
238+
start = datetime.strptime(start, "%Y-%m-%d %H:%M:%S").replace(
239+
tzinfo=timezone.utc
240+
)
239241
ts_I = self.get_target_timespan(
240242
cu.datetime_to_timestamp(start), target_records, prefix + "Time"
241243
)
@@ -284,6 +286,7 @@ def get_target_timespan(
284286
# i_b will be possibly one more than the last index
285287
i_b = min(i_b, last_idx)
286288
# Target timestamp is exactly equal to a data time
289+
closest_idx = None
287290
if i_b == last_idx or stamps[i_b] == target_timestamp:
288291
closest_idx = i_b
289292
elif i_b > 0:

cdippy/nchashes.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,9 @@ class NcHashes:
77

88
hashes_url = "http://cdip.ucsd.edu/data_access/metadata/wavecdf_by_datemod.txt"
99
new_hashes = {}
10-
hash_pkl = "HASH.pkl"
1110

12-
def __init__(self):
13-
self.load_hash_table()
11+
def __init__(self, hash_file_location=""):
12+
self.hash_pkl = hash_file_location + "/HASH.pkl"
1413

1514
def load_hash_table(self):
1615
lines = uu.read_url(self.hashes_url).strip().split("\n")
@@ -36,7 +35,7 @@ def compare_hash_tables(self) -> list:
3635
changed = []
3736
if old_hashes:
3837
if len(self.new_hashes) == 0:
39-
self.load_hash_table()
38+
return []
4039
for key in self.new_hashes:
4140
if key not in old_hashes.keys() or (
4241
key in old_hashes.keys() and old_hashes[key] != self.new_hashes[key]

cdippy/ncstats.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def deployment_summary(self) -> dict:
5454
return result
5555

5656
def load_nc_files(self, types: list = ["realtime", "historic", "archive"]) -> dict:
57-
"""Returns netcdf4 objects of a station's netcdf files"""
57+
"""Returns netCDF4 objects of a station's netcdf files"""
5858
self.nc_files = self.get_nc_files(types)
5959

6060
def load_file(self, nc_filename: str):

cdippy/ndbc.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,22 @@ def get_stn_info(wmo_id):
2525
uu.rfindt(root, results, "description")
2626

2727

28-
def get_wmo_id(stn):
28+
def get_wmo_id(
29+
stn,
30+
store=True,
31+
filepath=".",
32+
):
2933
"""Queries cdip wmo id table for a given station. Drops pickle file locally."""
30-
pkl_fl = "./WMO_IDS.pkl"
34+
pkl_fl = filepath + "/WMO_IDS.pkl" if store else None
3135
now = datetime.now(timezone.utc)
32-
if now.minute == 23 or not os.path.isfile(pkl_fl):
36+
if not pkl_fl or now.minute == 23 or not os.path.isfile(pkl_fl):
3337
url = "/".join([cdip_base, "wmo_ids"])
3438
r = uu.read_url(url)
3539
ids = {}
3640
for line in r.splitlines():
3741
ids[line[0:3]] = line[5:].strip()
38-
cu.pkl_dump(ids, pkl_fl)
42+
if pkl_fl:
43+
cu.pkl_dump(ids, pkl_fl)
3944
else:
4045
ids = cu.pkl_load(pkl_fl)
4146
if stn in ids:

cdippy/stndata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -503,7 +503,7 @@ def __merge_request(self):
503503
return result
504504

505505
def get_nc_files(self, types: list = nc_file_types) -> dict:
506-
"""Returns dict of netcdf4 objects of a station's netcdf files"""
506+
"""Returns dict of netCDF4 objects of a station's netcdf files"""
507507
result = {}
508508
for ftype in types:
509509
if ftype == "historic":

cdippy/url_utils.py

Lines changed: 29 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,31 @@
11
"""Methods for working with urllib scraping web pages"""
22

3+
import logging
4+
import tomllib
5+
from urllib import request, error
36
import xml.etree.ElementTree as ET
4-
import urllib.request
7+
8+
with open("pyproject.toml", "rb") as f:
9+
pyproject = tomllib.load(f)
10+
11+
version = pyproject["project"]["version"]
12+
cdippy_lib = f"CDIPpy/{version}"
13+
14+
_headers = {"User-Agent": cdippy_lib}
15+
logger = logging.getLogger(__name__)
16+
17+
18+
def _make_cdippy_request(url):
19+
req = request.Request(url, headers=_headers)
20+
try:
21+
return request.urlopen(req)
22+
except error.URLError as e:
23+
logger.exception(f"URL error: {e.reason}")
24+
except error.HTTPError as e:
25+
logger.exception(f"HTTP error: request to {url} returned {e.code} - {e.reason}")
26+
except Exception as e:
27+
logger.exception(e)
28+
return None
529

630

731
def rfindta(el, r, tag, attr):
@@ -24,23 +48,11 @@ def rfindt(el, r, tag):
2448
r.append(el.text)
2549

2650

27-
def url_exists(url):
28-
req = urllib.request.Request(url)
29-
try:
30-
urllib.request.urlopen(req)
31-
except Exception:
32-
return False
33-
else:
34-
return True
35-
36-
3751
def read_url(url):
38-
try:
39-
r = urllib.request.urlopen(url).read().decode("UTF-8")
40-
except Exception:
41-
return None
42-
return r
52+
response = _make_cdippy_request(url)
53+
return response.read().decode("UTF-8") if response else None
4354

4455

4556
def load_et_root(url):
46-
return ET.fromstring(urllib.request.urlopen(url).read())
57+
response = _make_cdippy_request(url)
58+
return ET.fromstring(response.read()) if response else None

cdippy/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import os
22
import errno
33
import pickle as pkl
4-
import time
4+
import calendar
55
from datetime import datetime
66
import pytz
77

@@ -71,7 +71,7 @@ def cdip_datetime(cdip_str: str) -> datetime:
7171

7272

7373
def datetime_to_timestamp(dt: datetime) -> int:
74-
return time.mktime(dt.timetuple())
74+
return calendar.timegm(dt.utctimetuple())
7575

7676

7777
def timestamp_to_datetime(ts: int) -> datetime:

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "cdippy"
3-
version = "0.1.0"
3+
version = "0.1.0-dev"
44
description = "CDIP python library"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
license = { text = "BSD License" }

0 commit comments

Comments
 (0)