Skip to content

Commit 71823ad

Browse files
thodson-usgsclaude
andcommitted
Centralize string-iterable normalization in _get_args (-153 LOC)
Addresses Copilot's review #5 (collapse mechanical per-param calls) and #1/#2 along the way: - `_get_args` now normalizes every multi-value string param it sees, using: * a tiny ``_NO_NORMALIZE_PARAMS`` set for the few names that need explicit bypass (``monitoring_location_id``, validated separately; ``time``/``last_modified``/``begin``/``end``/``datetime``, which can contain ``pd.NaT``/None and are parsed by ``_format_api_dates``); * runtime type detection for the rest: scalar non-string knobs (``limit``, ``ssl_check``, ``convert_type``, ``skip_geometry``, ...) and ``list[float]`` params (``bbox``, ``boundingBox``) pass through automatically without listing them. - Strip 153 per-function ``_normalize_str_iterable(...)`` assignments from ``dataretrieval/waterdata/api.py`` and drop the now-unused import. Net: -240 LOC in api.py. - Tighten ``_MONITORING_LOCATION_ID_RE`` from ``.+-.+`` to ``[^-\s]+-[^-\s]+`` (Copilot #2). Now rejects values with leading/ trailing whitespace or multiple hyphens, which used to pass and then silently return 0 rows from the API. - Fix the ``_normalize_str_iterable`` docstring claim "Used by every public waterdata getter" — now accurate ("from ``_get_args`` for every multi-value string parameter on every waterdata getter that uses ``_get_args``"; ``get_nearest_continuous`` and a few others don't). 26 normalizer/validator tests still pass; 267 + 2 skipped + 4 deselected in the full suite (the 4 deselected are flaky live-API tests that 502 intermittently — unrelated). Ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent e8c8e88 commit 71823ad

2 files changed

Lines changed: 55 additions & 255 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
_check_profiles,
3232
_default_headers,
3333
_get_args,
34-
_normalize_str_iterable,
3534
get_ogc_data,
3635
get_stats_data,
3736
)
@@ -234,15 +233,6 @@ def get_daily(
234233
... )
235234
"""
236235
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
237-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
238-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
239-
properties = _normalize_str_iterable(properties, "properties")
240-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
241-
daily_id = _normalize_str_iterable(daily_id, "daily_id")
242-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
243-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
244-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
245-
value = _normalize_str_iterable(value, "value")
246236
service = "daily"
247237
output_id = "daily_id"
248238

@@ -432,15 +422,6 @@ def get_continuous(
432422
... )
433423
"""
434424
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
435-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
436-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
437-
properties = _normalize_str_iterable(properties, "properties")
438-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
439-
continuous_id = _normalize_str_iterable(continuous_id, "continuous_id")
440-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
441-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
442-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
443-
value = _normalize_str_iterable(value, "value")
444425
service = "continuous"
445426
output_id = "continuous_id"
446427

@@ -740,84 +721,6 @@ def get_monitoring_locations(
740721
... )
741722
"""
742723
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
743-
agency_code = _normalize_str_iterable(agency_code, "agency_code")
744-
agency_name = _normalize_str_iterable(agency_name, "agency_name")
745-
monitoring_location_number = _normalize_str_iterable(
746-
monitoring_location_number, "monitoring_location_number"
747-
)
748-
monitoring_location_name = _normalize_str_iterable(
749-
monitoring_location_name, "monitoring_location_name"
750-
)
751-
district_code = _normalize_str_iterable(district_code, "district_code")
752-
country_code = _normalize_str_iterable(country_code, "country_code")
753-
country_name = _normalize_str_iterable(country_name, "country_name")
754-
state_code = _normalize_str_iterable(state_code, "state_code")
755-
state_name = _normalize_str_iterable(state_name, "state_name")
756-
county_code = _normalize_str_iterable(county_code, "county_code")
757-
county_name = _normalize_str_iterable(county_name, "county_name")
758-
minor_civil_division_code = _normalize_str_iterable(
759-
minor_civil_division_code, "minor_civil_division_code"
760-
)
761-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
762-
site_type = _normalize_str_iterable(site_type, "site_type")
763-
hydrologic_unit_code = _normalize_str_iterable(
764-
hydrologic_unit_code, "hydrologic_unit_code"
765-
)
766-
basin_code = _normalize_str_iterable(basin_code, "basin_code")
767-
altitude = _normalize_str_iterable(altitude, "altitude")
768-
altitude_accuracy = _normalize_str_iterable(altitude_accuracy, "altitude_accuracy")
769-
altitude_method_code = _normalize_str_iterable(
770-
altitude_method_code, "altitude_method_code"
771-
)
772-
altitude_method_name = _normalize_str_iterable(
773-
altitude_method_name, "altitude_method_name"
774-
)
775-
vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum")
776-
vertical_datum_name = _normalize_str_iterable(
777-
vertical_datum_name, "vertical_datum_name"
778-
)
779-
horizontal_positional_accuracy_code = _normalize_str_iterable(
780-
horizontal_positional_accuracy_code, "horizontal_positional_accuracy_code"
781-
)
782-
horizontal_positional_accuracy = _normalize_str_iterable(
783-
horizontal_positional_accuracy, "horizontal_positional_accuracy"
784-
)
785-
horizontal_position_method_code = _normalize_str_iterable(
786-
horizontal_position_method_code, "horizontal_position_method_code"
787-
)
788-
horizontal_position_method_name = _normalize_str_iterable(
789-
horizontal_position_method_name, "horizontal_position_method_name"
790-
)
791-
original_horizontal_datum = _normalize_str_iterable(
792-
original_horizontal_datum, "original_horizontal_datum"
793-
)
794-
original_horizontal_datum_name = _normalize_str_iterable(
795-
original_horizontal_datum_name, "original_horizontal_datum_name"
796-
)
797-
drainage_area = _normalize_str_iterable(drainage_area, "drainage_area")
798-
contributing_drainage_area = _normalize_str_iterable(
799-
contributing_drainage_area, "contributing_drainage_area"
800-
)
801-
time_zone_abbreviation = _normalize_str_iterable(
802-
time_zone_abbreviation, "time_zone_abbreviation"
803-
)
804-
uses_daylight_savings = _normalize_str_iterable(
805-
uses_daylight_savings, "uses_daylight_savings"
806-
)
807-
construction_date = _normalize_str_iterable(construction_date, "construction_date")
808-
aquifer_code = _normalize_str_iterable(aquifer_code, "aquifer_code")
809-
national_aquifer_code = _normalize_str_iterable(
810-
national_aquifer_code, "national_aquifer_code"
811-
)
812-
aquifer_type_code = _normalize_str_iterable(aquifer_type_code, "aquifer_type_code")
813-
well_constructed_depth = _normalize_str_iterable(
814-
well_constructed_depth, "well_constructed_depth"
815-
)
816-
hole_constructed_depth = _normalize_str_iterable(
817-
hole_constructed_depth, "hole_constructed_depth"
818-
)
819-
depth_source_code = _normalize_str_iterable(depth_source_code, "depth_source_code")
820-
properties = _normalize_str_iterable(properties, "properties")
821724
service = "monitoring-locations"
822725
output_id = "monitoring_location_id"
823726

@@ -1042,30 +945,6 @@ def get_time_series_metadata(
1042945
... )
1043946
"""
1044947
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1045-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1046-
parameter_name = _normalize_str_iterable(parameter_name, "parameter_name")
1047-
properties = _normalize_str_iterable(properties, "properties")
1048-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1049-
hydrologic_unit_code = _normalize_str_iterable(
1050-
hydrologic_unit_code, "hydrologic_unit_code"
1051-
)
1052-
state_name = _normalize_str_iterable(state_name, "state_name")
1053-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1054-
computation_period_identifier = _normalize_str_iterable(
1055-
computation_period_identifier, "computation_period_identifier"
1056-
)
1057-
computation_identifier = _normalize_str_iterable(
1058-
computation_identifier, "computation_identifier"
1059-
)
1060-
sublocation_identifier = _normalize_str_iterable(
1061-
sublocation_identifier, "sublocation_identifier"
1062-
)
1063-
primary = _normalize_str_iterable(primary, "primary")
1064-
parent_time_series_id = _normalize_str_iterable(
1065-
parent_time_series_id, "parent_time_series_id"
1066-
)
1067-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1068-
web_description = _normalize_str_iterable(web_description, "web_description")
1069948
service = "time-series-metadata"
1070949
output_id = "time_series_id"
1071950

@@ -1494,16 +1373,6 @@ def get_latest_continuous(
14941373
... )
14951374
"""
14961375
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1497-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1498-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1499-
properties = _normalize_str_iterable(properties, "properties")
1500-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1501-
latest_continuous_id = _normalize_str_iterable(
1502-
latest_continuous_id, "latest_continuous_id"
1503-
)
1504-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1505-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1506-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
15071376
service = "latest-continuous"
15081377
output_id = "latest_continuous_id"
15091378

@@ -1701,14 +1570,6 @@ def get_latest_daily(
17011570
... )
17021571
"""
17031572
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1704-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1705-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1706-
properties = _normalize_str_iterable(properties, "properties")
1707-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1708-
latest_daily_id = _normalize_str_iterable(latest_daily_id, "latest_daily_id")
1709-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1710-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1711-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
17121573
service = "latest-daily"
17131574
output_id = "latest_daily_id"
17141575

@@ -1900,21 +1761,6 @@ def get_field_measurements(
19001761
... )
19011762
"""
19021763
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1903-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1904-
observing_procedure_code = _normalize_str_iterable(
1905-
observing_procedure_code, "observing_procedure_code"
1906-
)
1907-
properties = _normalize_str_iterable(properties, "properties")
1908-
field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id")
1909-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1910-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1911-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
1912-
value = _normalize_str_iterable(value, "value")
1913-
observing_procedure = _normalize_str_iterable(
1914-
observing_procedure, "observing_procedure"
1915-
)
1916-
vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum")
1917-
measuring_agency = _normalize_str_iterable(measuring_agency, "measuring_agency")
19181764
service = "field-measurements"
19191765
output_id = "field_measurement_id"
19201766

@@ -2470,32 +2316,6 @@ def get_samples(
24702316
... )
24712317
24722318
"""
2473-
activityMediaName = _normalize_str_iterable(activityMediaName, "activityMediaName")
2474-
activityTypeCode = _normalize_str_iterable(activityTypeCode, "activityTypeCode")
2475-
characteristicGroup = _normalize_str_iterable(
2476-
characteristicGroup, "characteristicGroup"
2477-
)
2478-
characteristic = _normalize_str_iterable(characteristic, "characteristic")
2479-
characteristicUserSupplied = _normalize_str_iterable(
2480-
characteristicUserSupplied, "characteristicUserSupplied"
2481-
)
2482-
countryFips = _normalize_str_iterable(countryFips, "countryFips")
2483-
stateFips = _normalize_str_iterable(stateFips, "stateFips")
2484-
countyFips = _normalize_str_iterable(countyFips, "countyFips")
2485-
siteTypeCode = _normalize_str_iterable(siteTypeCode, "siteTypeCode")
2486-
siteTypeName = _normalize_str_iterable(siteTypeName, "siteTypeName")
2487-
usgsPCode = _normalize_str_iterable(usgsPCode, "usgsPCode")
2488-
hydrologicUnit = _normalize_str_iterable(hydrologicUnit, "hydrologicUnit")
2489-
monitoringLocationIdentifier = _normalize_str_iterable(
2490-
monitoringLocationIdentifier, "monitoringLocationIdentifier"
2491-
)
2492-
organizationIdentifier = _normalize_str_iterable(
2493-
organizationIdentifier, "organizationIdentifier"
2494-
)
2495-
projectIdentifier = _normalize_str_iterable(projectIdentifier, "projectIdentifier")
2496-
recordIdentifierUserSupplied = _normalize_str_iterable(
2497-
recordIdentifierUserSupplied, "recordIdentifierUserSupplied"
2498-
)
24992319

25002320
_check_profiles(service, profile)
25012321

@@ -2712,16 +2532,6 @@ def get_stats_por(
27122532
... end_date="01-31",
27132533
... )
27142534
"""
2715-
computation_type = _normalize_str_iterable(computation_type, "computation_type")
2716-
country_code = _normalize_str_iterable(country_code, "country_code")
2717-
state_code = _normalize_str_iterable(state_code, "state_code")
2718-
county_code = _normalize_str_iterable(county_code, "county_code")
2719-
parent_time_series_id = _normalize_str_iterable(
2720-
parent_time_series_id, "parent_time_series_id"
2721-
)
2722-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
2723-
site_type_name = _normalize_str_iterable(site_type_name, "site_type_name")
2724-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
27252535
# Build argument dictionary, omitting None values
27262536
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
27272537
params = _get_args(locals(), exclude={"expand_percentiles"})
@@ -2852,16 +2662,6 @@ def get_stats_date_range(
28522662
... computation_type=["minimum", "maximum"],
28532663
... )
28542664
"""
2855-
computation_type = _normalize_str_iterable(computation_type, "computation_type")
2856-
country_code = _normalize_str_iterable(country_code, "country_code")
2857-
state_code = _normalize_str_iterable(state_code, "state_code")
2858-
county_code = _normalize_str_iterable(county_code, "county_code")
2859-
parent_time_series_id = _normalize_str_iterable(
2860-
parent_time_series_id, "parent_time_series_id"
2861-
)
2862-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
2863-
site_type_name = _normalize_str_iterable(site_type_name, "site_type_name")
2864-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
28652665
# Build argument dictionary, omitting None values
28662666
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
28672667
params = _get_args(locals(), exclude={"expand_percentiles"})
@@ -3037,46 +2837,6 @@ def get_channel(
30372837
... )
30382838
"""
30392839
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
3040-
field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id")
3041-
measurement_number = _normalize_str_iterable(
3042-
measurement_number, "measurement_number"
3043-
)
3044-
channel_name = _normalize_str_iterable(channel_name, "channel_name")
3045-
channel_flow = _normalize_str_iterable(channel_flow, "channel_flow")
3046-
channel_flow_unit = _normalize_str_iterable(channel_flow_unit, "channel_flow_unit")
3047-
channel_width = _normalize_str_iterable(channel_width, "channel_width")
3048-
channel_width_unit = _normalize_str_iterable(
3049-
channel_width_unit, "channel_width_unit"
3050-
)
3051-
channel_area = _normalize_str_iterable(channel_area, "channel_area")
3052-
channel_area_unit = _normalize_str_iterable(channel_area_unit, "channel_area_unit")
3053-
channel_velocity = _normalize_str_iterable(channel_velocity, "channel_velocity")
3054-
channel_velocity_unit = _normalize_str_iterable(
3055-
channel_velocity_unit, "channel_velocity_unit"
3056-
)
3057-
channel_location_distance = _normalize_str_iterable(
3058-
channel_location_distance, "channel_location_distance"
3059-
)
3060-
channel_location_distance_unit = _normalize_str_iterable(
3061-
channel_location_distance_unit, "channel_location_distance_unit"
3062-
)
3063-
channel_stability = _normalize_str_iterable(channel_stability, "channel_stability")
3064-
channel_material = _normalize_str_iterable(channel_material, "channel_material")
3065-
channel_evenness = _normalize_str_iterable(channel_evenness, "channel_evenness")
3066-
horizontal_velocity_description = _normalize_str_iterable(
3067-
horizontal_velocity_description, "horizontal_velocity_description"
3068-
)
3069-
vertical_velocity_description = _normalize_str_iterable(
3070-
vertical_velocity_description, "vertical_velocity_description"
3071-
)
3072-
longitudinal_velocity_description = _normalize_str_iterable(
3073-
longitudinal_velocity_description, "longitudinal_velocity_description"
3074-
)
3075-
measurement_type = _normalize_str_iterable(measurement_type, "measurement_type")
3076-
channel_measurement_type = _normalize_str_iterable(
3077-
channel_measurement_type, "channel_measurement_type"
3078-
)
3079-
properties = _normalize_str_iterable(properties, "properties")
30802840
service = "channel-measurements"
30812841
output_id = "channel_measurements_id"
30822842

0 commit comments

Comments
 (0)