@@ -95,6 +95,57 @@ repos:
9595 # Multiple files (different sources) may map to the same data_key
9696 data_key : [station_id, subloc, param]
9797
98+ proprietary_formatted :
99+ root : " //cnrastore-bdo/Modeling_Data/repo/continuous/proprietary/formatted"
100+
101+ # Registry providing supplementary metadata (e.g., agency, location) for stations
102+ registry : continuous # this is an alias set up above for station_dbase.csv.
103+
104+ # Depending on settings for the repo, multiple sources (e.g., ncro, cdec) may supply the same time series
105+ # For instance Bacon Island conductance is observed by NCRO but distributed both by NCRO (QA/QC'd data on Water Data Library)
106+ # and on CDEC (realtime, not QA/QC). This field, in this case {source} appears in the filename template
107+ # (typically first slot) and drives globbing and merge order
108+ provider_key : source
109+
110+ # Strategy for resolving multiple providers for the same data_key if allwed.
111+ # Possibilities:
112+ # assume_unique: only one provider allowed in a repo (no resolution needed). See screened.
113+ # registry_column: use registry metadata to determine preferred providers
114+ provider_resolution_mode : registry_column
115+
116+ # Registry column used to decide provider selection policy
117+ # Example: station_id → agency gives you the id of the observing agency, which is unique and a good basis for
118+ # prioritizing various sources. Used in provider_resolution_order.
119+ provider_resolution_column : agency
120+
121+ # Mapping from registry value → ordered list of preferred providers (this is the generic term, in this case it is {source})
122+ # Example: if agency = ncro, prefer ncro then cdec in the {source} slot
123+ # The keys must match the provider_resolution_column values (agency) and mapped values must match the provider_key field (here: source)
124+ provider_resolution_order :
125+ ncro : ["ncro", "cdec"]
126+ dwr_ncro : ["ncro"]
127+ des : ["des"]
128+ dwr_des : ["des"]
129+ usgs : ["usgs"]
130+ noaa : ["noaa"]
131+ usbr : ["usbr", "cdec"]
132+ dwr_om : ["cdec"]
133+ dwr : ["dwr", "cdec"]
134+ ebmud : ["usgs", "ebmud", "cdec"]
135+
136+ # Filename template
137+ # {source} is the provider_key and disambiguates multiple files for the same data_key
138+ filename_templates :
139+ - " {source}_{station_id@subloc}_{agency_id}_{param}_{year}.csv"
140+
141+ # File identity: uniquely identifies a physical file (includes provider dimension)
142+ # shard is conceptual (e.g., year or syear/eyear depending on template)
143+ file_key : [source, station_id, subloc, param, shard]
144+
145+ # Data identity: uniquely identifies a logical time series independent of provider
146+ # Multiple files (different sources) may map to the same data_key
147+ data_key : [station_id, subloc, param]
148+
98149
99150 screened :
100151 root : " //cnrastore-bdo/Modeling_Data/repo/continuous/screened"
0 commit comments