@@ -289,47 +289,70 @@ def revise_filename_syear_eyear(pat, force=True, outfile="rename.txt"):
289289 logger .info (b )
290290 logger .info (f"Renaming complete for pattern: { pat } " )
291291
292-
293292def populate_repo (
294293 agency , param , dest , start , end , overwrite = False , ignore_existing = None
295294):
296295 """Populate repository for the given agency/source and parameter."""
297296 maximize_subloc = False
298297
299- slookup = dstore_config .config_file ("station_dbase" )
300298 if "ncro" in agency :
301299 vlookup = mapping_df
302300 agency = "ncro"
303301 else :
304302 vlookup = dstore_config .config_file ("variable_mappings" )
305303
306- subloclookup = dstore_config .config_file ("sublocations" )
307- df = pd .read_csv (slookup , sep = "," , comment = "#" , header = 0 , dtype = {"agency_id" : str })
304+ # Use repo-aware registry access instead of reading station_dbase CSV directly.
305+ slookup = dstore_config .repo_registry ("formatted" ).copy ()
306+
308307 filter_agency = "dwr_ncro" if agency == "ncro" else agency
309- df = df .loc [df .agency .str .lower () == filter_agency , :]
310- df ["agency_id" ] = df ["agency_id" ].str .replace ("'" , "" , regex = True )
308+ slookup = slookup .loc [slookup .agency .str .lower () == filter_agency , :]
309+ name_lookup = (
310+ slookup .loc [:, ["station_id" , "name" ]]
311+ .drop_duplicates (subset = ["station_id" ])
312+ )
311313
312- dfsub = read_station_subloc (subloclookup )
313- df = merge_station_subloc (df , dfsub , default_z = - 0.5 )
314314
315- df = df .reset_index ()
315+ dfsub = read_station_subloc (dstore_config .config_file ("sublocations" ))
316+ slookup = merge_station_subloc (slookup , dfsub , default_z = - 0.5 )
316317
317318 if ignore_existing is not None :
318- df = df [ ~ df ["station_id" ].isin (ignore_existing )]
319+ slookup = slookup [ ~ slookup ["station_id" ].isin (ignore_existing )]
319320
320321 dest_dir = dest
321322 source = "cdec" if agency in ["dwr" , "usbr" ] else agency
322- agency_id_col = "cdec_id" if source == "cdec" else "agency_id"
323+ agency_id_col = "agency_id"
324+ src_site_id_col = "cdec_id" if source == "cdec" else None
323325
324- df = df [["station_id" , "subloc" ]]
326+ # Preserve only the station display name, and only outside the standard
327+ # request-building pipeline.
328+ slookup = slookup .reset_index ()
329+ df_req = slookup .loc [:, ["station_id" , "subloc" ]]
325330
326331 stationlist = normalize_station_request (
327- stationframe = df ,
332+ stationframe = df_req ,
328333 param = param ,
329334 default_subloc = "default" ,
330335 )
331- stationlist = attach_agency_id (stationlist , repo_name = "formatted" , agency_id_col = agency_id_col )
336+ stationlist = attach_agency_id (
337+ stationlist ,
338+ repo_name = "formatted" ,
339+ agency_id_col = agency_id_col ,
340+ src_site_id_col = src_site_id_col ,
341+ on_missing = "drop" if src_site_id_col is not None else "raise" ,
342+ )
332343 stationlist = attach_src_var_id (stationlist , vlookup , source = source )
344+
345+ stationlist = stationlist .merge (
346+ name_lookup ,
347+ on = "station_id" ,
348+ how = "left" ,
349+ validate = "many_to_one" ,
350+ )
351+
352+ if stationlist ["name" ].isna ().any ():
353+ missing = stationlist .loc [stationlist ["name" ].isna (), "station_id" ].tolist ()
354+ raise ValueError (f"Missing station name for station_id(s): { missing } " )
355+
333356 if maximize_subloc :
334357 stationlist ["subloc" ] = "default"
335358 if param not in ["flow" , "elev" ]:
@@ -386,7 +409,8 @@ def supplement_ncro_with_cdec(df, dest, start, overwrite=False, ignore_existing=
386409 df = df [~ df ["station_id" ].isin (ignore_existing )]
387410
388411 source = "cdec"
389- agency_id_col = "cdec_id"
412+ agency_id_col = "agency_id"
413+ src_site_id_col = "cdec_id"
390414
391415 stationlist = normalize_station_request (
392416 stationframe = df ,
@@ -397,6 +421,7 @@ def supplement_ncro_with_cdec(df, dest, start, overwrite=False, ignore_existing=
397421 stationlist ,
398422 repo_name = "formatted" ,
399423 agency_id_col = agency_id_col ,
424+ src_site_id_col = src_site_id_col ,
400425 on_missing = "drop" ,
401426 )
402427
0 commit comments