Skip to content

Commit 62a58cf

Browse files
thodson-usgsclaude
andcommitted
Centralize string-iterable normalization in _get_args (-153 LOC)
Addresses Copilot's review #5 (collapse mechanical per-param calls) and #1/#2 along the way: - `_get_args` now normalizes every multi-value string param it sees, using: * a tiny ``_NO_NORMALIZE_PARAMS`` set for the few names that need explicit bypass (``monitoring_location_id``, validated separately; ``time``/``last_modified``/``begin``/``end``/``datetime``, which can contain ``pd.NaT``/None and are parsed by ``_format_api_dates``); * runtime type detection for the rest: scalar non-string knobs (``limit``, ``ssl_check``, ``convert_type``, ``skip_geometry``, ...) and ``list[float]`` params (``bbox``, ``boundingBox``) pass through automatically without listing them. - Strip 153 per-function ``_normalize_str_iterable(...)`` assignments from ``dataretrieval/waterdata/api.py`` and drop the now-unused import. Net: -240 LOC in api.py. - Tighten ``_MONITORING_LOCATION_ID_RE`` from ``.+-.+`` to ``[^-\s]+-[^-\s]+`` (Copilot #2). Now rejects values with leading/ trailing whitespace or multiple hyphens, which used to pass and then silently return 0 rows from the API. - Fix the ``_normalize_str_iterable`` docstring claim "Used by every public waterdata getter" — now accurate ("from ``_get_args`` for every multi-value string parameter on every waterdata getter that uses ``_get_args``"; ``get_nearest_continuous`` and a few others don't). 26 normalizer/validator tests still pass; 267 + 2 skipped + 4 deselected in the full suite (the 4 deselected are flaky live-API tests that 502 intermittently — unrelated). Ruff lint + format clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c05d082 commit 62a58cf

2 files changed

Lines changed: 55 additions & 255 deletions

File tree

dataretrieval/waterdata/api.py

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
_check_profiles,
3131
_default_headers,
3232
_get_args,
33-
_normalize_str_iterable,
3433
get_ogc_data,
3534
get_stats_data,
3635
)
@@ -217,15 +216,6 @@ def get_daily(
217216
... time = "2024-01-01/.."
218217
"""
219218
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
220-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
221-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
222-
properties = _normalize_str_iterable(properties, "properties")
223-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
224-
daily_id = _normalize_str_iterable(daily_id, "daily_id")
225-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
226-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
227-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
228-
value = _normalize_str_iterable(value, "value")
229219
service = "daily"
230220
output_id = "daily_id"
231221

@@ -415,15 +405,6 @@ def get_continuous(
415405
... )
416406
"""
417407
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
418-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
419-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
420-
properties = _normalize_str_iterable(properties, "properties")
421-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
422-
continuous_id = _normalize_str_iterable(continuous_id, "continuous_id")
423-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
424-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
425-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
426-
value = _normalize_str_iterable(value, "value")
427408
service = "continuous"
428409
output_id = "continuous_id"
429410

@@ -723,84 +704,6 @@ def get_monitoring_locations(
723704
... )
724705
"""
725706
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
726-
agency_code = _normalize_str_iterable(agency_code, "agency_code")
727-
agency_name = _normalize_str_iterable(agency_name, "agency_name")
728-
monitoring_location_number = _normalize_str_iterable(
729-
monitoring_location_number, "monitoring_location_number"
730-
)
731-
monitoring_location_name = _normalize_str_iterable(
732-
monitoring_location_name, "monitoring_location_name"
733-
)
734-
district_code = _normalize_str_iterable(district_code, "district_code")
735-
country_code = _normalize_str_iterable(country_code, "country_code")
736-
country_name = _normalize_str_iterable(country_name, "country_name")
737-
state_code = _normalize_str_iterable(state_code, "state_code")
738-
state_name = _normalize_str_iterable(state_name, "state_name")
739-
county_code = _normalize_str_iterable(county_code, "county_code")
740-
county_name = _normalize_str_iterable(county_name, "county_name")
741-
minor_civil_division_code = _normalize_str_iterable(
742-
minor_civil_division_code, "minor_civil_division_code"
743-
)
744-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
745-
site_type = _normalize_str_iterable(site_type, "site_type")
746-
hydrologic_unit_code = _normalize_str_iterable(
747-
hydrologic_unit_code, "hydrologic_unit_code"
748-
)
749-
basin_code = _normalize_str_iterable(basin_code, "basin_code")
750-
altitude = _normalize_str_iterable(altitude, "altitude")
751-
altitude_accuracy = _normalize_str_iterable(altitude_accuracy, "altitude_accuracy")
752-
altitude_method_code = _normalize_str_iterable(
753-
altitude_method_code, "altitude_method_code"
754-
)
755-
altitude_method_name = _normalize_str_iterable(
756-
altitude_method_name, "altitude_method_name"
757-
)
758-
vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum")
759-
vertical_datum_name = _normalize_str_iterable(
760-
vertical_datum_name, "vertical_datum_name"
761-
)
762-
horizontal_positional_accuracy_code = _normalize_str_iterable(
763-
horizontal_positional_accuracy_code, "horizontal_positional_accuracy_code"
764-
)
765-
horizontal_positional_accuracy = _normalize_str_iterable(
766-
horizontal_positional_accuracy, "horizontal_positional_accuracy"
767-
)
768-
horizontal_position_method_code = _normalize_str_iterable(
769-
horizontal_position_method_code, "horizontal_position_method_code"
770-
)
771-
horizontal_position_method_name = _normalize_str_iterable(
772-
horizontal_position_method_name, "horizontal_position_method_name"
773-
)
774-
original_horizontal_datum = _normalize_str_iterable(
775-
original_horizontal_datum, "original_horizontal_datum"
776-
)
777-
original_horizontal_datum_name = _normalize_str_iterable(
778-
original_horizontal_datum_name, "original_horizontal_datum_name"
779-
)
780-
drainage_area = _normalize_str_iterable(drainage_area, "drainage_area")
781-
contributing_drainage_area = _normalize_str_iterable(
782-
contributing_drainage_area, "contributing_drainage_area"
783-
)
784-
time_zone_abbreviation = _normalize_str_iterable(
785-
time_zone_abbreviation, "time_zone_abbreviation"
786-
)
787-
uses_daylight_savings = _normalize_str_iterable(
788-
uses_daylight_savings, "uses_daylight_savings"
789-
)
790-
construction_date = _normalize_str_iterable(construction_date, "construction_date")
791-
aquifer_code = _normalize_str_iterable(aquifer_code, "aquifer_code")
792-
national_aquifer_code = _normalize_str_iterable(
793-
national_aquifer_code, "national_aquifer_code"
794-
)
795-
aquifer_type_code = _normalize_str_iterable(aquifer_type_code, "aquifer_type_code")
796-
well_constructed_depth = _normalize_str_iterable(
797-
well_constructed_depth, "well_constructed_depth"
798-
)
799-
hole_constructed_depth = _normalize_str_iterable(
800-
hole_constructed_depth, "hole_constructed_depth"
801-
)
802-
depth_source_code = _normalize_str_iterable(depth_source_code, "depth_source_code")
803-
properties = _normalize_str_iterable(properties, "properties")
804707
service = "monitoring-locations"
805708
output_id = "monitoring_location_id"
806709

@@ -1025,30 +928,6 @@ def get_time_series_metadata(
1025928
... )
1026929
"""
1027930
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1028-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1029-
parameter_name = _normalize_str_iterable(parameter_name, "parameter_name")
1030-
properties = _normalize_str_iterable(properties, "properties")
1031-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1032-
hydrologic_unit_code = _normalize_str_iterable(
1033-
hydrologic_unit_code, "hydrologic_unit_code"
1034-
)
1035-
state_name = _normalize_str_iterable(state_name, "state_name")
1036-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1037-
computation_period_identifier = _normalize_str_iterable(
1038-
computation_period_identifier, "computation_period_identifier"
1039-
)
1040-
computation_identifier = _normalize_str_iterable(
1041-
computation_identifier, "computation_identifier"
1042-
)
1043-
sublocation_identifier = _normalize_str_iterable(
1044-
sublocation_identifier, "sublocation_identifier"
1045-
)
1046-
primary = _normalize_str_iterable(primary, "primary")
1047-
parent_time_series_id = _normalize_str_iterable(
1048-
parent_time_series_id, "parent_time_series_id"
1049-
)
1050-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1051-
web_description = _normalize_str_iterable(web_description, "web_description")
1052931
service = "time-series-metadata"
1053932
output_id = "time_series_id"
1054933

@@ -1229,16 +1108,6 @@ def get_latest_continuous(
12291108
... )
12301109
"""
12311110
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1232-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1233-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1234-
properties = _normalize_str_iterable(properties, "properties")
1235-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1236-
latest_continuous_id = _normalize_str_iterable(
1237-
latest_continuous_id, "latest_continuous_id"
1238-
)
1239-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1240-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1241-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
12421111
service = "latest-continuous"
12431112
output_id = "latest_continuous_id"
12441113

@@ -1421,14 +1290,6 @@ def get_latest_daily(
14211290
... )
14221291
"""
14231292
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1424-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1425-
statistic_id = _normalize_str_iterable(statistic_id, "statistic_id")
1426-
properties = _normalize_str_iterable(properties, "properties")
1427-
time_series_id = _normalize_str_iterable(time_series_id, "time_series_id")
1428-
latest_daily_id = _normalize_str_iterable(latest_daily_id, "latest_daily_id")
1429-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1430-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1431-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
14321293
service = "latest-daily"
14331294
output_id = "latest_daily_id"
14341295

@@ -1610,21 +1471,6 @@ def get_field_measurements(
16101471
... )
16111472
"""
16121473
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
1613-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
1614-
observing_procedure_code = _normalize_str_iterable(
1615-
observing_procedure_code, "observing_procedure_code"
1616-
)
1617-
properties = _normalize_str_iterable(properties, "properties")
1618-
field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id")
1619-
approval_status = _normalize_str_iterable(approval_status, "approval_status")
1620-
unit_of_measure = _normalize_str_iterable(unit_of_measure, "unit_of_measure")
1621-
qualifier = _normalize_str_iterable(qualifier, "qualifier")
1622-
value = _normalize_str_iterable(value, "value")
1623-
observing_procedure = _normalize_str_iterable(
1624-
observing_procedure, "observing_procedure"
1625-
)
1626-
vertical_datum = _normalize_str_iterable(vertical_datum, "vertical_datum")
1627-
measuring_agency = _normalize_str_iterable(measuring_agency, "measuring_agency")
16281474
service = "field-measurements"
16291475
output_id = "field_measurement_id"
16301476

@@ -1935,32 +1781,6 @@ def get_samples(
19351781
... )
19361782
19371783
"""
1938-
activityMediaName = _normalize_str_iterable(activityMediaName, "activityMediaName")
1939-
activityTypeCode = _normalize_str_iterable(activityTypeCode, "activityTypeCode")
1940-
characteristicGroup = _normalize_str_iterable(
1941-
characteristicGroup, "characteristicGroup"
1942-
)
1943-
characteristic = _normalize_str_iterable(characteristic, "characteristic")
1944-
characteristicUserSupplied = _normalize_str_iterable(
1945-
characteristicUserSupplied, "characteristicUserSupplied"
1946-
)
1947-
countryFips = _normalize_str_iterable(countryFips, "countryFips")
1948-
stateFips = _normalize_str_iterable(stateFips, "stateFips")
1949-
countyFips = _normalize_str_iterable(countyFips, "countyFips")
1950-
siteTypeCode = _normalize_str_iterable(siteTypeCode, "siteTypeCode")
1951-
siteTypeName = _normalize_str_iterable(siteTypeName, "siteTypeName")
1952-
usgsPCode = _normalize_str_iterable(usgsPCode, "usgsPCode")
1953-
hydrologicUnit = _normalize_str_iterable(hydrologicUnit, "hydrologicUnit")
1954-
monitoringLocationIdentifier = _normalize_str_iterable(
1955-
monitoringLocationIdentifier, "monitoringLocationIdentifier"
1956-
)
1957-
organizationIdentifier = _normalize_str_iterable(
1958-
organizationIdentifier, "organizationIdentifier"
1959-
)
1960-
projectIdentifier = _normalize_str_iterable(projectIdentifier, "projectIdentifier")
1961-
recordIdentifierUserSupplied = _normalize_str_iterable(
1962-
recordIdentifierUserSupplied, "recordIdentifierUserSupplied"
1963-
)
19641784

19651785
_check_profiles(service, profile)
19661786

@@ -2105,16 +1925,6 @@ def get_stats_por(
21051925
... end_date="01-31",
21061926
... )
21071927
"""
2108-
computation_type = _normalize_str_iterable(computation_type, "computation_type")
2109-
country_code = _normalize_str_iterable(country_code, "country_code")
2110-
state_code = _normalize_str_iterable(state_code, "state_code")
2111-
county_code = _normalize_str_iterable(county_code, "county_code")
2112-
parent_time_series_id = _normalize_str_iterable(
2113-
parent_time_series_id, "parent_time_series_id"
2114-
)
2115-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
2116-
site_type_name = _normalize_str_iterable(site_type_name, "site_type_name")
2117-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
21181928
# Build argument dictionary, omitting None values
21191929
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
21201930
params = _get_args(locals(), exclude={"expand_percentiles"})
@@ -2245,16 +2055,6 @@ def get_stats_date_range(
22452055
... computation_type=["minimum", "maximum"],
22462056
... )
22472057
"""
2248-
computation_type = _normalize_str_iterable(computation_type, "computation_type")
2249-
country_code = _normalize_str_iterable(country_code, "country_code")
2250-
state_code = _normalize_str_iterable(state_code, "state_code")
2251-
county_code = _normalize_str_iterable(county_code, "county_code")
2252-
parent_time_series_id = _normalize_str_iterable(
2253-
parent_time_series_id, "parent_time_series_id"
2254-
)
2255-
site_type_code = _normalize_str_iterable(site_type_code, "site_type_code")
2256-
site_type_name = _normalize_str_iterable(site_type_name, "site_type_name")
2257-
parameter_code = _normalize_str_iterable(parameter_code, "parameter_code")
22582058
# Build argument dictionary, omitting None values
22592059
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
22602060
params = _get_args(locals(), exclude={"expand_percentiles"})
@@ -2430,46 +2230,6 @@ def get_channel(
24302230
... )
24312231
"""
24322232
monitoring_location_id = _check_monitoring_location_id(monitoring_location_id)
2433-
field_visit_id = _normalize_str_iterable(field_visit_id, "field_visit_id")
2434-
measurement_number = _normalize_str_iterable(
2435-
measurement_number, "measurement_number"
2436-
)
2437-
channel_name = _normalize_str_iterable(channel_name, "channel_name")
2438-
channel_flow = _normalize_str_iterable(channel_flow, "channel_flow")
2439-
channel_flow_unit = _normalize_str_iterable(channel_flow_unit, "channel_flow_unit")
2440-
channel_width = _normalize_str_iterable(channel_width, "channel_width")
2441-
channel_width_unit = _normalize_str_iterable(
2442-
channel_width_unit, "channel_width_unit"
2443-
)
2444-
channel_area = _normalize_str_iterable(channel_area, "channel_area")
2445-
channel_area_unit = _normalize_str_iterable(channel_area_unit, "channel_area_unit")
2446-
channel_velocity = _normalize_str_iterable(channel_velocity, "channel_velocity")
2447-
channel_velocity_unit = _normalize_str_iterable(
2448-
channel_velocity_unit, "channel_velocity_unit"
2449-
)
2450-
channel_location_distance = _normalize_str_iterable(
2451-
channel_location_distance, "channel_location_distance"
2452-
)
2453-
channel_location_distance_unit = _normalize_str_iterable(
2454-
channel_location_distance_unit, "channel_location_distance_unit"
2455-
)
2456-
channel_stability = _normalize_str_iterable(channel_stability, "channel_stability")
2457-
channel_material = _normalize_str_iterable(channel_material, "channel_material")
2458-
channel_evenness = _normalize_str_iterable(channel_evenness, "channel_evenness")
2459-
horizontal_velocity_description = _normalize_str_iterable(
2460-
horizontal_velocity_description, "horizontal_velocity_description"
2461-
)
2462-
vertical_velocity_description = _normalize_str_iterable(
2463-
vertical_velocity_description, "vertical_velocity_description"
2464-
)
2465-
longitudinal_velocity_description = _normalize_str_iterable(
2466-
longitudinal_velocity_description, "longitudinal_velocity_description"
2467-
)
2468-
measurement_type = _normalize_str_iterable(measurement_type, "measurement_type")
2469-
channel_measurement_type = _normalize_str_iterable(
2470-
channel_measurement_type, "channel_measurement_type"
2471-
)
2472-
properties = _normalize_str_iterable(properties, "properties")
24732233
service = "channel-measurements"
24742234
output_id = "channel_measurements_id"
24752235

0 commit comments

Comments
 (0)