Skip to content

Commit a80318f

Browse files
committed
Add proprietary_formatted to config file
1 parent 91c749e commit a80318f

1 file changed

Lines changed: 51 additions & 0 deletions

File tree

dms_datastore/config_data/dstore_config.yaml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,57 @@ repos:
9595
# Multiple files (different sources) may map to the same data_key
9696
data_key: [station_id, subloc, param]
9797

98+
proprietary_formatted:
99+
root: "//cnrastore-bdo/Modeling_Data/repo/continuous/proprietary/formatted"
100+
101+
# Registry providing supplementary metadata (e.g., agency, location) for stations
102+
registry: continuous # this is an alias set up above for station_dbase.csv.
103+
104+
# Depending on settings for the repo, multiple sources (e.g., ncro, cdec) may supply the same time series
105+
# For instance Bacon Island conductance is observed by NCRO but distributed both by NCRO (QA/QC'd data on Water Data Library)
106+
# and on CDEC (realtime, not QA/QC). This field, in this case {source} appears in the filename template
107+
# (typically first slot) and drives globbing and merge order
108+
provider_key: source
109+
110+
# Strategy for resolving multiple providers for the same data_key if allwed.
111+
# Possibilities:
112+
# assume_unique: only one provider allowed in a repo (no resolution needed). See screened.
113+
# registry_column: use registry metadata to determine preferred providers
114+
provider_resolution_mode: registry_column
115+
116+
# Registry column used to decide provider selection policy
117+
# Example: station_id → agency gives you the id of the observing agency, which is unique and a good basis for
118+
# prioritizing various sources. Used in provider_resolution_order.
119+
provider_resolution_column: agency
120+
121+
# Mapping from registry value → ordered list of preferred providers (this is the generic term, in this case it is {source})
122+
# Example: if agency = ncro, prefer ncro then cdec in the {source} slot
123+
# The keys must match the provider_resolution_column values (agency) and mapped values must match the provider_key field (here: source)
124+
provider_resolution_order:
125+
ncro: ["ncro", "cdec"]
126+
dwr_ncro: ["ncro"]
127+
des: ["des"]
128+
dwr_des: ["des"]
129+
usgs: ["usgs"]
130+
noaa: ["noaa"]
131+
usbr: ["usbr", "cdec"]
132+
dwr_om: ["cdec"]
133+
dwr: ["dwr", "cdec"]
134+
ebmud: ["usgs", "ebmud", "cdec"]
135+
136+
# Filename template
137+
# {source} is the provider_key and disambiguates multiple files for the same data_key
138+
filename_templates:
139+
- "{source}_{station_id@subloc}_{agency_id}_{param}_{year}.csv"
140+
141+
# File identity: uniquely identifies a physical file (includes provider dimension)
142+
# shard is conceptual (e.g., year or syear/eyear depending on template)
143+
file_key: [source, station_id, subloc, param, shard]
144+
145+
# Data identity: uniquely identifies a logical time series independent of provider
146+
# Multiple files (different sources) may map to the same data_key
147+
data_key: [station_id, subloc, param]
148+
98149

99150
screened:
100151
root: "//cnrastore-bdo/Modeling_Data/repo/continuous/screened"

0 commit comments

Comments
 (0)