Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
a81643a
[DEV-1423] Upgrade Pydantic version
aguest-kc Mar 3, 2026
282a1e2
[DEV-1423] Changes for Pydantic upgrade
aguest-kc Mar 3, 2026
3e17b22
[DEV-14236] Upgrade pydantic and add pydantic-settings
aguest-kc Mar 5, 2026
a3a670e
[DEV-14236] Changes for pydantic upgrade
aguest-kc Mar 5, 2026
23b41db
[DEV-14236] Utils changes for Pydantic
aguest-kc Mar 6, 2026
0cb638b
[DEV-14236] Update validator
aguest-kc Mar 9, 2026
bf2e0d6
[DEV-14236] Updates to default config
sethstoudenmier Mar 30, 2026
c11760d
[DEV-14236] Pydantic and Ruff fixes
aguest-kc Apr 1, 2026
6d879d0
[DEV-14236] Ruff fixes
aguest-kc Apr 1, 2026
9742097
[DEV-14236] Formatting fixes
aguest-kc Apr 1, 2026
e7addf9
[DEV-14236] Fix import
aguest-kc Apr 1, 2026
2eed1ef
Merge branch 'heads/qat' into ftr/dev-14236-pydantic-upgrade
aguest-kc Apr 1, 2026
7707083
[DEV-14236] Account for string passwords
aguest-kc Apr 1, 2026
d245ff8
[DEV-14236] Updates for Pydantic 2
aguest-kc Apr 1, 2026
abeed9a
[DEV-14236] Make AWS_PROFILE optional
aguest-kc Apr 1, 2026
b83917e
[DEV-14236] Make AWS_PROFILE optional
aguest-kc Apr 1, 2026
3b98b8a
[DEV-14236] Fix failing tests
aguest-kc Apr 13, 2026
ee189e5
[DEV-14236] Ruff fixes
aguest-kc Apr 13, 2026
5c979ec
[DEV-14236] Fix failing tests
aguest-kc Apr 14, 2026
ce2bf3e
Merge branch 'qat' into ftr/dev-14236-pydantic-upgrade
aguest-kc Apr 14, 2026
e343789
[DEV-14236] Ruff fix
aguest-kc Apr 14, 2026
bdcb5e8
[DEV-14236] Update validator
aguest-kc Apr 14, 2026
841095c
[DEV-14236] Update for Pydantic v2
aguest-kc Apr 14, 2026
942b377
[DEV-14236] Use @field_validator() instead of @validator()
aguest-kc Apr 14, 2026
4d9afd4
[DEV-14236] More updates for Pydantic v2
aguest-kc Apr 14, 2026
7f6dfe8
[DEV-14236] Ruff fixes
aguest-kc Apr 14, 2026
a052303
[DEV-14236] More updates for Pydantic v2
aguest-kc Apr 14, 2026
89c95f9
[DEV-14236] Update monthly_download_filters for Pydantic v2
aguest-kc Apr 14, 2026
66fbab4
[DEV-14236] Fix failing tests
aguest-kc Apr 14, 2026
8c575f9
[DEV-14236] Hardcode the asterisks for DB password
aguest-kc Apr 15, 2026
45352ac
[DEV-14236] Remove comment
aguest-kc Apr 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ dependencies = [
"psutil==5.9.*",
"psycopg>=3.3.3",
"py-gfm==2.0.0",
"pydantic[dotenv]==1.9.*",
"pydantic==2.12",
"pydantic-settings>=2.13.1",
"python-json-logger==2.0.7",
"requests==2.31.*",
"retrying==1.3.4",
Expand Down
92 changes: 44 additions & 48 deletions usaspending_api/config/envs/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,9 @@
import pathlib
from typing import Any, ClassVar, Union

from pydantic import (
AnyHttpUrl,
BaseSettings,
PostgresDsn,
SecretStr,
root_validator,
)
from pydantic import SecretStr, model_validator
from pydantic.networks import AnyHttpUrl, PostgresDsn
from pydantic_settings import BaseSettings, SettingsConfigDict

from usaspending_api.config.utils import (
ENV_SPECIFIC_OVERRIDE,
Expand Down Expand Up @@ -134,20 +130,22 @@ def _validate_database_conf(
)

if enough_parts:
pg_dsn = PostgresDsn(
url=None,
try:
_port = int(values[f"{resource_conf_prefix}_PORT"])
except (ValueError, TypeError):
_port = None

pg_dsn = PostgresDsn.build(
scheme=values[f"{resource_conf_prefix}_SCHEME"],
user=values[f"{resource_conf_prefix}_USER"],
password=values[
f"{resource_conf_prefix}_PASSWORD"
].get_secret_value(),
host=values[f"{resource_conf_prefix}_HOST"],
port=values[f"{resource_conf_prefix}_PORT"],
path=(
"/" + values[f"{resource_conf_prefix}_NAME"]
if values[f"{resource_conf_prefix}_NAME"]
else None
username=values[f"{resource_conf_prefix}_USER"],
password=(
values[f"{resource_conf_prefix}_PASSWORD"].get_secret_value()
if isinstance(values[f"{resource_conf_prefix}_PASSWORD"], SecretStr)
else values[f"{resource_conf_prefix}_PASSWORD"]
),
host=values[f"{resource_conf_prefix}_HOST"],
port=_port,
path=values.get(f"{resource_conf_prefix}_NAME"),
)
values = eval_default_factory_from_root_validator(
cls, values, url_conf_name, lambda: str(pg_dsn)
Expand All @@ -159,7 +157,7 @@ def _validate_database_conf(

# noinspection PyMethodParameters
# Pydantic returns a classmethod for its validators, so the cls param is correct
@root_validator
@model_validator(mode="before")
def _DATABASE_URL_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any]:
"""A root validator to backfill DATABASE_URL and USASPENDING_DB_* part config vars and validate that they are
all consistent.
Expand All @@ -169,6 +167,8 @@ def _DATABASE_URL_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, An
- ALSO validates that the parts and whole string are consistent. A ``ValueError`` is thrown if found to
be inconsistent, which will in turn raise a ``pydantic.ValidationError`` at configuration time.
"""
default_fields = {name: field.default for name, field in cls.model_fields.items()}
values = {**default_fields, **values}
# noinspection PyArgumentList
cls._validate_database_conf(
cls=cls,
Expand All @@ -181,7 +181,7 @@ def _DATABASE_URL_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, An

# noinspection PyMethodParameters
# Pydantic returns a classmethod for its validators, so the cls param is correct
@root_validator
@model_validator(mode="before")
def _BROKER_DB_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any]:
"""A root validator to backfill BROKER_DB and BROKER_DB_* part config vars and validate
that they are all consistent.
Expand All @@ -191,6 +191,8 @@ def _BROKER_DB_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any]:
- ALSO validates that the parts and whole string are consistent. A ``ValueError`` is thrown if found to
be inconsistent, which will in turn raise a ``pydantic.ValidationError`` at configuration time.
"""
default_fields = {name: field.default for name, field in cls.model_fields.items()}
values = {**default_fields, **values}
# noinspection PyArgumentList
cls._validate_database_conf(
cls=cls,
Expand All @@ -203,17 +205,17 @@ def _BROKER_DB_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any]:

# ==== [Elasticsearch] ====
# Where to connect to elasticsearch.
ES_HOSTNAME: str = None # FACTORY_PROVIDED_VALUE. See below validator-factory
ES_HOSTNAME: str | None = None # FACTORY_PROVIDED_VALUE. See below validator-factory
ES_SCHEME: str = "https"
ES_HOST: str = ENV_SPECIFIC_OVERRIDE
ES_PORT: str = None
ES_USER: str = None
ES_PASSWORD: SecretStr = None
ES_NAME: str = None
ES_PORT: str | None = None
ES_USER: str | None = None
ES_PASSWORD: SecretStr | None = None
ES_NAME: str | None = None

# noinspection PyMethodParameters
# Pydantic returns a classmethod for its validators, so the cls param is correct
@root_validator
@model_validator(mode="before")
def _ES_HOSTNAME_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any]:
"""A root validator to backfill ES_HOSTNAME and ES_* part config vars and validate that they are
all consistent.
Expand All @@ -223,6 +225,8 @@ def _ES_HOSTNAME_and_parts_factory(cls, values: dict[str, Any]) -> dict[str, Any
- ALSO validates that the parts and whole string are consistent. A ``ValueError`` is thrown if found to
be inconsistent, which will in turn raise a ``pydantic.ValidationError`` at configuration time.
"""
default_fields = {name: field.default for name, field in cls.model_fields.items()}
values = {**default_fields, **values}
# noinspection PyArgumentList
cls._validate_http_url(
cls=cls,
Expand Down Expand Up @@ -251,9 +255,7 @@ def _validate_http_url(
# - it should take precedence
# - its values will be used to backfill any missing URL parts stored as separate config vars
if is_full_url_provided:
values = backfill_url_parts_config(
cls, url_conf_name, resource_conf_prefix, values
)
values = backfill_url_parts_config(cls, url_conf_name, resource_conf_prefix, values)

# If the full URL config is not provided, try to build-it-up from provided parts, then set the full URL
if not is_full_url_provided:
Expand All @@ -268,21 +270,16 @@ def _validate_http_url(

if enough_parts:
http_url = AnyHttpUrl(
url=None,
scheme=values[f"{resource_conf_prefix}_SCHEME"],
user=values[f"{resource_conf_prefix}_USER"],
username=values[f"{resource_conf_prefix}_USER"],
password=(
values[f"{resource_conf_prefix}_PASSWORD"].get_secret_value()
if values[f"{resource_conf_prefix}_PASSWORD"]
else None
),
host=values[f"{resource_conf_prefix}_HOST"],
port=values[f"{resource_conf_prefix}_PORT"],
path=(
"/" + values[f"{resource_conf_prefix}_NAME"]
if values[f"{resource_conf_prefix}_NAME"]
else None
),
path=values.get(f"{resource_conf_prefix}_NAME"),
)
values = eval_default_factory_from_root_validator(
cls, values, url_conf_name, lambda: str(http_url)
Expand All @@ -298,7 +295,7 @@ def _validate_http_url(
# Those clusters are the only place we currently need this variable,
# If you write code that depends on this config, make sure you
# set BRANCH as an environment variable on your machine
BRANCH: str = os.environ.get("BRANCH")
BRANCH: str | None = os.environ.get("BRANCH")

# SPARK_SCHEDULER_MODE = "FAIR" # if used with weighted pools, could allow round-robin tasking of simultaneous jobs
# TODO: have to deal with this if really wanting balanced (FAIR) task execution
Expand Down Expand Up @@ -361,10 +358,10 @@ def _validate_http_url(
AWS_ACCESS_KEY: SecretStr = ENV_SPECIFIC_OVERRIDE
AWS_SECRET_KEY: SecretStr = ENV_SPECIFIC_OVERRIDE
# Setting AWS_PROFILE to None so boto3 doesn't try to pick up the placeholder string as an actual profile to find
AWS_PROFILE: str = None # USER_SPECIFIC_OVERRIDE
SPARK_S3_BUCKET: str = os.environ.get("SPARK_S3_BUCKET")
BULK_DOWNLOAD_S3_BUCKET_NAME: str = os.environ.get("BULK_DOWNLOAD_S3_BUCKET_NAME")
DATABASE_DOWNLOAD_S3_BUCKET_NAME: str = os.environ.get(
AWS_PROFILE: str | None = None # USER_SPECIFIC_OVERRIDE
SPARK_S3_BUCKET: str | None = os.environ.get("SPARK_S3_BUCKET")
BULK_DOWNLOAD_S3_BUCKET_NAME: str | None = os.environ.get("BULK_DOWNLOAD_S3_BUCKET_NAME")
DATABASE_DOWNLOAD_S3_BUCKET_NAME: str | None = os.environ.get(
"DATABASE_DOWNLOAD_S3_BUCKET_NAME"
)
DELTA_LAKE_S3_PATH: str = "data/delta" # path within SPARK_S3_BUCKET where Delta output data will accumulate
Expand All @@ -380,9 +377,8 @@ def _validate_http_url(
COVID19_DOWNLOAD_README_OBJECT_KEY: str = (
f"files/{COVID19_DOWNLOAD_README_FILE_NAME}"
)

class Config:
pass
# supporting use of a user-provided (ang git-ignored) .env file for overrides
env_file = str(_PROJECT_ROOT_DIR / ".env")
env_file_encoding = "utf-8"
model_config = SettingsConfigDict(
env_file=str(_PROJECT_ROOT_DIR / ".env"),
env_file_encoding="utf-8",
extra="allow",
)
27 changes: 16 additions & 11 deletions usaspending_api/config/envs/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
# - Set config variables to DefaultConfig.USER_SPECIFIC_OVERRIDE where there is expected to be a
# user-provided a config value for a variable (e.g. in the ../.env file)
########################################################################################################################
from typing import ClassVar
from typing import Any, ClassVar

from pydantic import root_validator
from pydantic import model_validator
from pydantic.types import SecretStr
from usaspending_api.config.envs.default import DefaultConfig, _PROJECT_ROOT_DIR

from usaspending_api.config.envs.default import _PROJECT_ROOT_DIR, DefaultConfig
from usaspending_api.config.utils import (
USER_SPECIFIC_OVERRIDE,
FACTORY_PROVIDED_VALUE,
USER_SPECIFIC_OVERRIDE,
eval_default_factory_from_root_validator,
)

Expand Down Expand Up @@ -90,20 +91,24 @@ class LocalConfig(DefaultConfig):
USE_AWS: bool = False
AWS_ACCESS_KEY: SecretStr = MINIO_ACCESS_KEY
AWS_SECRET_KEY: SecretStr = MINIO_SECRET_KEY
AWS_PROFILE: str = None
AWS_PROFILE: str | None = None
AWS_REGION: str = ""
SPARK_S3_BUCKET: str = "data"
BULK_DOWNLOAD_S3_BUCKET_NAME: str = "bulk-download"
DATABASE_DOWNLOAD_S3_BUCKET_NAME = "dti-usaspending-db"
DATABASE_DOWNLOAD_S3_BUCKET_NAME: str = "dti-usaspending-db"

# Since this config values is built by composing others, we want to late/lazily-evaluate their values,
# in case the declared value is overridden by a shell env var or .env file value
AWS_S3_ENDPOINT: str = FACTORY_PROVIDED_VALUE # See below validator-based factory
AWS_S3_ENDPOINT: str | None = FACTORY_PROVIDED_VALUE # See below validator-based factory

@model_validator(mode="before")
def _AWS_S3_ENDPOINT_factory(cls, values: dict[str, Any]) -> dict[str, Any]:
# Merge defaults into values
default_fields = {name: field.default for name, field in cls.model_fields.items()}
merged_values = {**default_fields, **values}

@root_validator
def _AWS_S3_ENDPOINT_factory(cls, values):
def factory_func():
return values["MINIO_HOST"] + ":" + values["MINIO_PORT"]
def factory_func() -> str:
return merged_values["MINIO_HOST"] + ":" + merged_values["MINIO_PORT"]

return eval_default_factory_from_root_validator(cls, values, "AWS_S3_ENDPOINT", factory_func)

Expand Down
Loading
Loading