Skip to content

Commit 7dba3ef

Browse files
jpvelezcursoragent
andcommitted
fix(tariff_mappers): use real assign_utility data and central crosswalk
- electric_tariff_mapper: use sb.electric_utility when present, support metadata_utility path, accept std_name - gas_tariff_mapper: use get_std_name_to_gas_tariff_key for National Grid split (nimo/keddy/kedli), same metadata handling - Add tests for both mappers Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent e3bc1df commit 7dba3ef

4 files changed

Lines changed: 175 additions & 68 deletions

File tree

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""Tests for electric tariff mapper."""
2+
3+
from typing import cast
4+
5+
import polars as pl
6+
7+
from utils.electric_tariff_mapper import map_electric_tariff
8+
from utils.types import SBScenario
9+
10+
11+
def test_map_electric_tariff_filters_by_std_name():
12+
"""map_electric_tariff filters metadata by sb.electric_utility (std_name)."""
13+
metadata = pl.LazyFrame({
14+
"bldg_id": [1, 2, 3, 4],
15+
"sb.electric_utility": ["coned", "coned", "nyseg", "nimo"],
16+
"postprocess_group.has_hp": [True, False, True, False],
17+
})
18+
sb_scenario = SBScenario("default", 1)
19+
result = map_electric_tariff(
20+
SB_metadata_df=metadata,
21+
electric_utility="coned",
22+
SB_scenario=sb_scenario,
23+
state="NY",
24+
)
25+
df = cast(pl.DataFrame, result.collect())
26+
assert df.height == 2
27+
assert set(df["bldg_id"].to_list()) == {1, 2}
28+
assert all(df["tariff_key"].str.starts_with("coned_"))
29+
30+
31+
def test_map_electric_tariff_tariff_key_format():
32+
"""tariff_key uses std_name and scenario."""
33+
metadata = pl.LazyFrame({
34+
"bldg_id": [1],
35+
"sb.electric_utility": ["nyseg"],
36+
"postprocess_group.has_hp": [True],
37+
})
38+
sb_scenario = SBScenario("seasonal", 1)
39+
result = map_electric_tariff(
40+
SB_metadata_df=metadata,
41+
electric_utility="nyseg",
42+
SB_scenario=sb_scenario,
43+
state="NY",
44+
)
45+
df = cast(pl.DataFrame, result.collect())
46+
assert df["tariff_key"][0] == "nyseg_seasonal_1_HP.csv"

tests/test_gas_tariff_mapper.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
"""Tests for gas tariff mapper."""
2+
3+
from typing import cast
4+
5+
import polars as pl
6+
7+
from utils.gas_tariff_mapper import map_gas_tariff
8+
9+
10+
def test_map_gas_tariff_uses_crosswalk_for_tariff_key():
11+
"""map_gas_tariff maps sb.gas_utility (std_name) to tariff_key via crosswalk."""
12+
metadata = pl.LazyFrame({
13+
"bldg_id": [1, 2, 3],
14+
"sb.electric_utility": ["coned", "coned", "coned"],
15+
"sb.gas_utility": ["nimo", "nyseg", "coned"],
16+
})
17+
result = map_gas_tariff(
18+
SB_metadata_df=metadata,
19+
electric_utility_name="coned",
20+
)
21+
df = cast(pl.DataFrame, result.collect())
22+
assert df.height == 3
23+
# nimo -> national_grid, nyseg -> nyseg, coned -> coned
24+
tariff_keys = df["tariff_key"].to_list()
25+
assert "national_grid" in tariff_keys
26+
assert "nyseg" in tariff_keys
27+
assert "coned" in tariff_keys

utils/electric_tariff_mapper.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import warnings
23
from pathlib import Path
34
from typing import cast
45

@@ -81,7 +82,9 @@ def map_electric_tariff(
8182
parser.add_argument("--state", required=True, help="State code (e.g. RI)")
8283
parser.add_argument("--upgrade_id", required=True, help="Upgrade id (e.g. 00)")
8384
parser.add_argument(
84-
"--electric_utility", required=True, help="Electric utility (e.g. Coned)"
85+
"--electric_utility",
86+
required=True,
87+
help="Electric utility std_name (e.g. coned, nyseg, nimo)",
8588
)
8689
parser.add_argument(
8790
"--SB_scenario_type",
@@ -98,46 +101,55 @@ def map_electric_tariff(
98101
)
99102
args = parser.parse_args()
100103

101-
#########################################################
102-
# For now, we will manually add the electric utility name column. Later on, the metadata parquet will be updated to include this column.
103-
# Assign first ~1/3 to Coned, next ~1/3 to National Grid, last ~1/3 to NYSEG.
104104
try: # If the metadata path is an S3 path, use the S3Path class.
105105
base_path = S3Path(args.metadata_path)
106-
metadata_path = (
107-
base_path
108-
/ f"state={args.state}"
109-
/ f"upgrade={args.upgrade_id}"
110-
/ "metadata-sb.parquet"
111-
)
112-
if not metadata_path.exists():
113-
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
114-
# Polars scan_parquet needs a string path; S3Path.as_uri() gives s3:// URL
115-
SB_metadata_df = pl.scan_parquet(
116-
str(metadata_path), storage_options=STORAGE_OPTIONS
117-
)
106+
use_s3 = True
118107
except ValueError:
119-
# If the metadata path is a local path, use the Path class.
120108
base_path = Path(args.metadata_path)
109+
use_s3 = False
110+
111+
# Support metadata_utility path (utility_assignment.parquet) or metadata path (metadata-sb.parquet)
112+
if "metadata_utility" in str(args.metadata_path):
113+
metadata_path = base_path / f"state={args.state}" / "utility_assignment.parquet"
114+
else:
121115
metadata_path = (
122116
base_path
123117
/ f"state={args.state}"
124118
/ f"upgrade={args.upgrade_id}"
125119
/ "metadata-sb.parquet"
126120
)
127-
if not metadata_path.exists():
128-
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
129-
SB_metadata_df = pl.scan_parquet(str(metadata_path))
130-
131-
# Add dummy electric utility column (deterministic by bldg_id). Later this column will be pre-existing in the SB metadata parquet.
132-
SB_metadata_df_with_electric_utility = SB_metadata_df.with_columns(
133-
pl.when(pl.col("bldg_id").hash() % 3 == 0)
134-
.then(pl.lit("Coned"))
135-
.when(pl.col("bldg_id").hash() % 3 == 1)
136-
.then(pl.lit("National Grid"))
137-
.otherwise(pl.lit("NYSEG"))
138-
.alias("sb.electric_utility")
121+
122+
if use_s3 and not metadata_path.exists():
123+
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
124+
if not use_s3 and not Path(metadata_path).exists():
125+
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
126+
127+
storage_opts = STORAGE_OPTIONS if use_s3 else None
128+
SB_metadata_df = (
129+
pl.scan_parquet(str(metadata_path), storage_options=storage_opts)
130+
if storage_opts
131+
else pl.scan_parquet(str(metadata_path))
139132
)
140-
#########################################################
133+
134+
# Use real sb.electric_utility if present; else fall back to synthetic (deprecated)
135+
schema_cols = SB_metadata_df.collect_schema().names()
136+
if "sb.electric_utility" in schema_cols:
137+
SB_metadata_df_with_electric_utility = SB_metadata_df
138+
else:
139+
warnings.warn(
140+
"metadata has no sb.electric_utility column; using synthetic data. "
141+
"Run assign_utility_ny and point --metadata_path to metadata_utility for real data.",
142+
DeprecationWarning,
143+
stacklevel=2,
144+
)
145+
SB_metadata_df_with_electric_utility = SB_metadata_df.with_columns(
146+
pl.when(pl.col("bldg_id").hash() % 3 == 0)
147+
.then(pl.lit("coned"))
148+
.when(pl.col("bldg_id").hash() % 3 == 1)
149+
.then(pl.lit("nimo"))
150+
.otherwise(pl.lit("nyseg"))
151+
.alias("sb.electric_utility")
152+
)
141153

142154
sb_scenario = SBScenario(args.SB_scenario_type, args.SB_scenario_year)
143155
electrical_tariff_mapping_df = map_electric_tariff(

utils/gas_tariff_mapper.py

Lines changed: 60 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import warnings
23
from pathlib import Path
34
from typing import cast
45

@@ -7,6 +8,7 @@
78

89
from utils import get_aws_region
910
from utils.types import electric_utility
11+
from utils.utility_codes import get_std_name_to_gas_tariff_key
1012

1113
STORAGE_OPTIONS = {"aws_region": get_aws_region()}
1214

@@ -24,14 +26,20 @@ def map_gas_tariff(
2426
if test_sample.is_empty():
2527
raise ValueError(f"No rows found for electric utility {electric_utility_name}")
2628

27-
gas_tariff_mapping_df = (
28-
utility_metadata_df.select(pl.col("bldg_id", "sb.gas_utility"))
29-
.with_columns(
30-
pl.when(pl.col("sb.gas_utility") == "National Grid")
31-
.then(pl.lit("national_grid"))
32-
.otherwise(pl.lit("nyseg"))
29+
gas_tariff_map = get_std_name_to_gas_tariff_key()
30+
31+
def _tariff_key_expr() -> pl.Expr:
32+
# Map sb.gas_utility (std_name) -> tariff_key via crosswalk; fallback to std_name
33+
return (
34+
pl.col("sb.gas_utility")
35+
.replace(gas_tariff_map)
36+
.fill_null(pl.col("sb.gas_utility"))
3337
.alias("tariff_key")
3438
)
39+
40+
gas_tariff_mapping_df = (
41+
utility_metadata_df.select(pl.col("bldg_id", "sb.gas_utility"))
42+
.with_columns(_tariff_key_expr())
3543
.drop("sb.gas_utility")
3644
)
3745

@@ -50,7 +58,9 @@ def map_gas_tariff(
5058
parser.add_argument("--state", required=True, help="State code (e.g. NY, RI)")
5159
parser.add_argument("--upgrade_id", required=True, help="Upgrade id (e.g. 00)")
5260
parser.add_argument(
53-
"--electric_utility", required=True, help="Electric utility (e.g. Coned)"
61+
"--electric_utility",
62+
required=True,
63+
help="Electric utility std_name (e.g. coned, nyseg, nimo)",
5464
)
5565
parser.add_argument(
5666
"--output_dir",
@@ -59,47 +69,59 @@ def map_gas_tariff(
5969
)
6070
args = parser.parse_args()
6171

62-
#########################################################
63-
# For now, we will manually add the electric and gas utility columns. Later the metadata parquet will include them.
64-
# Electric: first ~1/3 Coned, next ~1/3 National Grid, last ~1/3 NYSEG. Gas: half National Grid, half NYSEG.
6572
try: # If the metadata path is an S3 path, use the S3Path class.
6673
base_path = S3Path(args.metadata_path)
67-
metadata_path = (
68-
base_path
69-
/ f"state={args.state}"
70-
/ f"upgrade={args.upgrade_id}"
71-
/ "metadata-sb.parquet"
72-
)
73-
if not metadata_path.exists():
74-
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
75-
SB_metadata_df = pl.scan_parquet(
76-
str(metadata_path), storage_options=STORAGE_OPTIONS
77-
)
78-
except ValueError: # If the metadata path is a local path, use the Path class.
74+
use_s3 = True
75+
except ValueError:
7976
base_path = Path(args.metadata_path)
77+
use_s3 = False
78+
79+
# Support metadata_utility path (utility_assignment.parquet) or metadata path (metadata-sb.parquet)
80+
if "metadata_utility" in str(args.metadata_path):
81+
metadata_path = base_path / f"state={args.state}" / "utility_assignment.parquet"
82+
else:
8083
metadata_path = (
8184
base_path
8285
/ f"state={args.state}"
8386
/ f"upgrade={args.upgrade_id}"
8487
/ "metadata-sb.parquet"
8588
)
86-
if not metadata_path.exists():
87-
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
88-
SB_metadata_df = pl.scan_parquet(str(metadata_path))
89-
90-
SB_metadata_df_with_utilities = SB_metadata_df.with_columns(
91-
pl.when(pl.col("bldg_id").hash() % 3 == 0)
92-
.then(pl.lit("Coned"))
93-
.when(pl.col("bldg_id").hash() % 3 == 1)
94-
.then(pl.lit("National Grid"))
95-
.otherwise(pl.lit("NYSEG"))
96-
.alias("sb.electric_utility"),
97-
pl.when((pl.col("bldg_id").hash() % 2) == 0)
98-
.then(pl.lit("National Grid"))
99-
.otherwise(pl.lit("NYSEG"))
100-
.alias("sb.gas_utility"),
89+
90+
if use_s3 and not metadata_path.exists():
91+
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
92+
if not use_s3 and not Path(metadata_path).exists():
93+
raise FileNotFoundError(f"Metadata path {metadata_path} does not exist")
94+
95+
storage_opts = STORAGE_OPTIONS if use_s3 else None
96+
SB_metadata_df = (
97+
pl.scan_parquet(str(metadata_path), storage_options=storage_opts)
98+
if storage_opts
99+
else pl.scan_parquet(str(metadata_path))
101100
)
102-
#########################################################
101+
102+
# Use real sb.electric_utility and sb.gas_utility if present; else fall back to synthetic (deprecated)
103+
schema_cols = SB_metadata_df.collect_schema().names()
104+
if "sb.electric_utility" in schema_cols and "sb.gas_utility" in schema_cols:
105+
SB_metadata_df_with_utilities = SB_metadata_df
106+
else:
107+
warnings.warn(
108+
"metadata has no sb.electric_utility/sb.gas_utility columns; using synthetic data. "
109+
"Run assign_utility_ny and point --metadata_path to metadata_utility for real data.",
110+
DeprecationWarning,
111+
stacklevel=2,
112+
)
113+
SB_metadata_df_with_utilities = SB_metadata_df.with_columns(
114+
pl.when(pl.col("bldg_id").hash() % 3 == 0)
115+
.then(pl.lit("coned"))
116+
.when(pl.col("bldg_id").hash() % 3 == 1)
117+
.then(pl.lit("nimo"))
118+
.otherwise(pl.lit("nyseg"))
119+
.alias("sb.electric_utility"),
120+
pl.when((pl.col("bldg_id").hash() % 2) == 0)
121+
.then(pl.lit("nimo"))
122+
.otherwise(pl.lit("nyseg"))
123+
.alias("sb.gas_utility"),
124+
)
103125

104126
gas_tariff_mapping_df = map_gas_tariff(
105127
SB_metadata_df=SB_metadata_df_with_utilities,

0 commit comments

Comments
 (0)