|
40 | 40 | ] |
41 | 41 |
|
42 | 42 |
|
| 43 | +def _is_code_column(name: str) -> bool: |
| 44 | + """True if a WQP column name denotes a code/identifier whose leading zeros |
| 45 | + are significant and must be preserved as ``str`` (HUCs, parameter codes, |
| 46 | + FIPS codes): the name ends with "code" or contains "identifier"/"huc"/"fips". |
| 47 | + """ |
| 48 | + lname = name.lower() |
| 49 | + return lname.endswith("code") or any( |
| 50 | + token in lname for token in ("identifier", "huc", "fips") |
| 51 | + ) |
| 52 | + |
| 53 | + |
| 54 | +def _read_wqp_csv(text: str) -> DataFrame: |
| 55 | + """Read a WQP CSV, forcing code/identifier columns to ``str``. |
| 56 | +
|
| 57 | + WQP returns codes with significant leading zeros — HUCs, parameter codes |
| 58 | + (``USGSpcode``), FIPS state/county codes. A bare ``read_csv`` infers those |
| 59 | + as int/float and silently drops the zeros (``"00060"`` -> ``60``, HUC8 |
| 60 | + ``"07090002"`` -> ``7090002``). Read the header first, then re-read with |
| 61 | + ``dtype=str`` for every column that :func:`_is_code_column` flags, so the |
| 62 | + zeros survive. |
| 63 | + """ |
| 64 | + columns = pd.read_csv(StringIO(text), delimiter=",", nrows=0).columns |
| 65 | + str_cols = {col: str for col in columns if _is_code_column(col)} |
| 66 | + return pd.read_csv(StringIO(text), delimiter=",", low_memory=False, dtype=str_cols) |
| 67 | + |
| 68 | + |
43 | 69 | def get_results( |
44 | 70 | ssl_check=True, |
45 | 71 | legacy=True, |
@@ -153,7 +179,7 @@ def get_results( |
153 | 179 |
|
154 | 180 | response = query(url, kwargs, delimiter=";", ssl_check=ssl_check) |
155 | 181 |
|
156 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 182 | + df = _read_wqp_csv(response.text) |
157 | 183 | df = _attach_datetime_columns(df) |
158 | 184 | return df, WQP_Metadata(response, **kwargs) |
159 | 185 |
|
@@ -208,7 +234,7 @@ def what_sites( |
208 | 234 |
|
209 | 235 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
210 | 236 |
|
211 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 237 | + df = _read_wqp_csv(response.text) |
212 | 238 |
|
213 | 239 | return df, WQP_Metadata(response, **kwargs) |
214 | 240 |
|
@@ -259,7 +285,7 @@ def what_organizations( |
259 | 285 |
|
260 | 286 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
261 | 287 |
|
262 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 288 | + df = _read_wqp_csv(response.text) |
263 | 289 |
|
264 | 290 | return df, WQP_Metadata(response, **kwargs) |
265 | 291 |
|
@@ -306,7 +332,7 @@ def what_projects(ssl_check=True, legacy=True, **kwargs): |
306 | 332 |
|
307 | 333 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
308 | 334 |
|
309 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 335 | + df = _read_wqp_csv(response.text) |
310 | 336 |
|
311 | 337 | return df, WQP_Metadata(response, **kwargs) |
312 | 338 |
|
@@ -370,7 +396,7 @@ def what_activities( |
370 | 396 |
|
371 | 397 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
372 | 398 |
|
373 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 399 | + df = _read_wqp_csv(response.text) |
374 | 400 |
|
375 | 401 | return df, WQP_Metadata(response, **kwargs) |
376 | 402 |
|
@@ -428,7 +454,7 @@ def what_detection_limits( |
428 | 454 |
|
429 | 455 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
430 | 456 |
|
431 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 457 | + df = _read_wqp_csv(response.text) |
432 | 458 |
|
433 | 459 | return df, WQP_Metadata(response, **kwargs) |
434 | 460 |
|
@@ -479,7 +505,7 @@ def what_habitat_metrics( |
479 | 505 |
|
480 | 506 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
481 | 507 |
|
482 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 508 | + df = _read_wqp_csv(response.text) |
483 | 509 |
|
484 | 510 | return df, WQP_Metadata(response, **kwargs) |
485 | 511 |
|
@@ -531,7 +557,7 @@ def what_project_weights(ssl_check=True, legacy=True, **kwargs): |
531 | 557 |
|
532 | 558 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
533 | 559 |
|
534 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 560 | + df = _read_wqp_csv(response.text) |
535 | 561 |
|
536 | 562 | return df, WQP_Metadata(response, **kwargs) |
537 | 563 |
|
@@ -583,7 +609,7 @@ def what_activity_metrics(ssl_check=True, legacy=True, **kwargs): |
583 | 609 |
|
584 | 610 | response = query(url, payload=kwargs, delimiter=";", ssl_check=ssl_check) |
585 | 611 |
|
586 | | - df = pd.read_csv(StringIO(response.text), delimiter=",", low_memory=False) |
| 612 | + df = _read_wqp_csv(response.text) |
587 | 613 |
|
588 | 614 | return df, WQP_Metadata(response, **kwargs) |
589 | 615 |
|
|
0 commit comments