-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathcatalog.py
More file actions
106 lines (92 loc) · 4.23 KB
/
catalog.py
File metadata and controls
106 lines (92 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import logging
from .record_type import RecordType
from .dsspath import DssPath
from datetime import datetime
import re
# Get logger for this module
logger = logging.getLogger(__name__)
class Catalog:
"""manage list of objects inside a DSS database"""
def __init__(self, uncondensed_paths, recordTypes):
self.uncondensed_paths = uncondensed_paths
self.rawRecordTypes = recordTypes
self.timeSeriesDictNoDates = {} # key is path without date, value is a list of dates
self.recordTypeDict = {} # key is path w/o date, value is recordType
self.__create_condensed_catalog()
def get_record_type(self, pathname):
"""gets the record type for a given path
Args:
pathname (str): dss pathname
Returns:
RecordType: the record type :class:`hecdss.RecordType` of DSS data stored in this pathname
"""
if pathname.lower() in self.recordTypeDict:
rt = self.recordTypeDict[pathname.lower()]
else:
path = DssPath(pathname, RecordType.Unknown)
if( path.path_without_date().__str__().lower() in self.recordTypeDict):
rt = self.recordTypeDict[path.path_without_date().__str__().lower()]
else:
rt = self.recordTypeDict[path.path_location_info().__str__().lower()]
return rt
def __create_condensed_catalog(self):
"""
condensed catalog combines time-series records into a single condensed path
other record types are not condensed.
time-series records must match all parts except the D (date) part to be combined.
"""
self.items = []
raw_paths = {}
for i in range(len(self.uncondensed_paths)):
rawPath = self.uncondensed_paths[i]
recordType = RecordType.RecordTypeFromInt(self.rawRecordTypes[i])
path = DssPath(rawPath,recordType)
# if timeseries - accumulate dates within a dataset
if path.is_time_series():
cleanPath = str(path.path_without_date())
raw_paths[cleanPath.lower()] = rawPath
self.recordTypeDict[cleanPath.lower()] = recordType
if re.match(r"^\d{2}[A-Za-z]{3}\d{4}$", path.D): # Check if path.D matches the format 'DDMMMYYYY'
tsRecords = self.timeSeriesDictNoDates.setdefault(cleanPath.lower(), [])
t = datetime.strptime(path.D,"%d%b%Y")
tsRecords.append(t)
elif recordType in [RecordType.PairedData, RecordType.Grid, RecordType.Text,
RecordType.LocationInfo, RecordType.Array]:
raw_paths[rawPath] = rawPath
self.recordTypeDict[str(path).lower()] = recordType
self.items.append(path)
else:
raise Exception(f"unsupported record_type: {recordType}")
# go through each timeSeriesDictNoDates, and sort each list of dates
# use first and last to create the condensed path
for key in self.timeSeriesDictNoDates.keys():
dateList = sorted(self.timeSeriesDictNoDates[key])
condensedDpart = dateList[0].strftime("%d%b%Y")
if len(dateList) >1:
condensedDpart +="-"+ dateList[-1].strftime("%d%b%Y")
# insert condensed D part into path used as key
rt = self.recordTypeDict[key]
p = DssPath(raw_paths[key],rt)
p.D = condensedDpart
self.items.append(p)
def print(self):
"""Print all items in the catalog to stdout."""
for ds in self.items:
print(ds)
def log_items(self, level=logging.INFO):
"""Log all items in the catalog at the specified logging level.
Args:
level: Logging level (default: logging.INFO)
"""
for ds in self.items:
logger.log(level, "Catalog item: %s", ds)
def __iter__(self):
self.index = 0 # Initialize the index to 0
return self
def __next__(self):
if self.index < len(self.items):
result = self.items[self.index]
self.index += 1
return result
else:
raise StopIteration