11import argparse
2+ import warnings
23from pathlib import Path
34from typing import cast
45
78
89from utils import get_aws_region
910from utils .types import electric_utility
11+ from utils .utility_codes import get_std_name_to_gas_tariff_key
1012
1113STORAGE_OPTIONS = {"aws_region" : get_aws_region ()}
1214
@@ -24,14 +26,20 @@ def map_gas_tariff(
2426 if test_sample .is_empty ():
2527 raise ValueError (f"No rows found for electric utility { electric_utility_name } " )
2628
27- gas_tariff_mapping_df = (
28- utility_metadata_df .select (pl .col ("bldg_id" , "sb.gas_utility" ))
29- .with_columns (
30- pl .when (pl .col ("sb.gas_utility" ) == "National Grid" )
31- .then (pl .lit ("national_grid" ))
32- .otherwise (pl .lit ("nyseg" ))
29+ gas_tariff_map = get_std_name_to_gas_tariff_key ()
30+
31+ def _tariff_key_expr () -> pl .Expr :
32+ # Map sb.gas_utility (std_name) -> tariff_key via crosswalk; fallback to std_name
33+ return (
34+ pl .col ("sb.gas_utility" )
35+ .replace (gas_tariff_map )
36+ .fill_null (pl .col ("sb.gas_utility" ))
3337 .alias ("tariff_key" )
3438 )
39+
40+ gas_tariff_mapping_df = (
41+ utility_metadata_df .select (pl .col ("bldg_id" , "sb.gas_utility" ))
42+ .with_columns (_tariff_key_expr ())
3543 .drop ("sb.gas_utility" )
3644 )
3745
@@ -50,7 +58,9 @@ def map_gas_tariff(
5058 parser .add_argument ("--state" , required = True , help = "State code (e.g. NY, RI)" )
5159 parser .add_argument ("--upgrade_id" , required = True , help = "Upgrade id (e.g. 00)" )
5260 parser .add_argument (
53- "--electric_utility" , required = True , help = "Electric utility (e.g. Coned)"
61+ "--electric_utility" ,
62+ required = True ,
63+ help = "Electric utility std_name (e.g. coned, nyseg, nimo)" ,
5464 )
5565 parser .add_argument (
5666 "--output_dir" ,
@@ -59,47 +69,59 @@ def map_gas_tariff(
5969 )
6070 args = parser .parse_args ()
6171
62- #########################################################
63- # For now, we will manually add the electric and gas utility columns. Later the metadata parquet will include them.
64- # Electric: first ~1/3 Coned, next ~1/3 National Grid, last ~1/3 NYSEG. Gas: half National Grid, half NYSEG.
6572 try : # If the metadata path is an S3 path, use the S3Path class.
6673 base_path = S3Path (args .metadata_path )
67- metadata_path = (
68- base_path
69- / f"state={ args .state } "
70- / f"upgrade={ args .upgrade_id } "
71- / "metadata-sb.parquet"
72- )
73- if not metadata_path .exists ():
74- raise FileNotFoundError (f"Metadata path { metadata_path } does not exist" )
75- SB_metadata_df = pl .scan_parquet (
76- str (metadata_path ), storage_options = STORAGE_OPTIONS
77- )
78- except ValueError : # If the metadata path is a local path, use the Path class.
74+ use_s3 = True
75+ except ValueError :
7976 base_path = Path (args .metadata_path )
77+ use_s3 = False
78+
79+ # Support metadata_utility path (utility_assignment.parquet) or metadata path (metadata-sb.parquet)
80+ if "metadata_utility" in str (args .metadata_path ):
81+ metadata_path = base_path / f"state={ args .state } " / "utility_assignment.parquet"
82+ else :
8083 metadata_path = (
8184 base_path
8285 / f"state={ args .state } "
8386 / f"upgrade={ args .upgrade_id } "
8487 / "metadata-sb.parquet"
8588 )
86- if not metadata_path .exists ():
87- raise FileNotFoundError (f"Metadata path { metadata_path } does not exist" )
88- SB_metadata_df = pl .scan_parquet (str (metadata_path ))
89-
90- SB_metadata_df_with_utilities = SB_metadata_df .with_columns (
91- pl .when (pl .col ("bldg_id" ).hash () % 3 == 0 )
92- .then (pl .lit ("Coned" ))
93- .when (pl .col ("bldg_id" ).hash () % 3 == 1 )
94- .then (pl .lit ("National Grid" ))
95- .otherwise (pl .lit ("NYSEG" ))
96- .alias ("sb.electric_utility" ),
97- pl .when ((pl .col ("bldg_id" ).hash () % 2 ) == 0 )
98- .then (pl .lit ("National Grid" ))
99- .otherwise (pl .lit ("NYSEG" ))
100- .alias ("sb.gas_utility" ),
89+
90+ if use_s3 and not metadata_path .exists ():
91+ raise FileNotFoundError (f"Metadata path { metadata_path } does not exist" )
92+ if not use_s3 and not Path (metadata_path ).exists ():
93+ raise FileNotFoundError (f"Metadata path { metadata_path } does not exist" )
94+
95+ storage_opts = STORAGE_OPTIONS if use_s3 else None
96+ SB_metadata_df = (
97+ pl .scan_parquet (str (metadata_path ), storage_options = storage_opts )
98+ if storage_opts
99+ else pl .scan_parquet (str (metadata_path ))
101100 )
102- #########################################################
101+
102+ # Use real sb.electric_utility and sb.gas_utility if present; else fall back to synthetic (deprecated)
103+ schema_cols = SB_metadata_df .collect_schema ().names ()
104+ if "sb.electric_utility" in schema_cols and "sb.gas_utility" in schema_cols :
105+ SB_metadata_df_with_utilities = SB_metadata_df
106+ else :
107+ warnings .warn (
108+ "metadata has no sb.electric_utility/sb.gas_utility columns; using synthetic data. "
109+ "Run assign_utility_ny and point --metadata_path to metadata_utility for real data." ,
110+ DeprecationWarning ,
111+ stacklevel = 2 ,
112+ )
113+ SB_metadata_df_with_utilities = SB_metadata_df .with_columns (
114+ pl .when (pl .col ("bldg_id" ).hash () % 3 == 0 )
115+ .then (pl .lit ("coned" ))
116+ .when (pl .col ("bldg_id" ).hash () % 3 == 1 )
117+ .then (pl .lit ("nimo" ))
118+ .otherwise (pl .lit ("nyseg" ))
119+ .alias ("sb.electric_utility" ),
120+ pl .when ((pl .col ("bldg_id" ).hash () % 2 ) == 0 )
121+ .then (pl .lit ("nimo" ))
122+ .otherwise (pl .lit ("nyseg" ))
123+ .alias ("sb.gas_utility" ),
124+ )
103125
104126 gas_tariff_mapping_df = map_gas_tariff (
105127 SB_metadata_df = SB_metadata_df_with_utilities ,
0 commit comments