From 53aab7f77fc3bf522434d30683c63c9bebf658d5 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 01:20:19 +0000 Subject: [PATCH 1/6] Fix: Ensure user_id logging by enabling JWT validation and enhancing logs This commit addresses an issue where your user_id was not being logged consistently across environments. The primary cause identified was the Cloud Run service's inability to fetch Google's public JWT keys due to a restrictive VPC egress setting (`PRIVATE_RANGES_ONLY`). This prevented the fallback JWT decoding mechanism (used when IAP headers are not the primary source) from successfully validating tokens and extracting user_id. Changes: - Modified `infra/feed-api/main.tf` to set the Cloud Run service's `template.vpc_access.egress` to `ALL_TRAFFIC`. This allows the service to connect to external Google APIs required for fetching JWT public keys. - Enhanced error logging in `api/src/middleware/request_context.py`: - Added specific error logs in `resolve_google_public_keys` for failures during fetching or processing of Google's public keys. - Upgraded a warning to an error in `decode_jwt` for JWT decoding exceptions and added a log for when public keys are unavailable. These changes ensure that if JWT-based authentication is used (either as primary or fallback), it can function correctly. Further investigation may be needed to confirm if IAP headers from the load balancer are being correctly propagated to the Cloud Run service, as this is the preferred method for obtaining user identity. --- api/src/middleware/request_context.py | 9 ++++++--- infra/feed-api/main.tf | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/api/src/middleware/request_context.py b/api/src/middleware/request_context.py index 71e42e073..873a60562 100644 --- a/api/src/middleware/request_context.py +++ b/api/src/middleware/request_context.py @@ -33,8 +33,10 @@ def resolve_google_public_keys(self): ) response.raise_for_status() self.google_public_keys = response.json() - except RequestException as e: - print(f"Error fetching Google's public keys: {e}") + if not self.google_public_keys: + logging.error("Fetched Google's public keys, but the result is empty or invalid.") + except requests.exceptions.RequestException as e: + logging.error(f"Failed to fetch Google's public keys: {e}") def decode_jwt(self, token: str): """ @@ -47,10 +49,11 @@ def decode_jwt(self, token: str): token = token.replace("Bearer ", "") self.resolve_google_public_keys() if not self.google_public_keys: + logging.error("Cannot decode JWT: Google's public keys are not available.") return None return jwt.decode(token, self.google_public_keys, audience=get_config(PROJECT_ID)) except Exception as e: - logging.warning("Error decoding JWT: %s", e) + logging.error("Error decoding JWT: %s", e) return None def _extract_from_headers(self, headers: dict, scope: Scope) -> None: diff --git a/infra/feed-api/main.tf b/infra/feed-api/main.tf index 17f3ca089..a15278437 100644 --- a/infra/feed-api/main.tf +++ b/infra/feed-api/main.tf @@ -56,7 +56,7 @@ resource "google_cloud_run_v2_service" "mobility-feed-api" { service_account = google_service_account.containers_service_account.email vpc_access { connector = data.google_vpc_access_connector.vpc_connector.id - egress = "PRIVATE_RANGES_ONLY" + egress = "ALL_TRAFFIC" } containers { image = "${var.gcp_region}-docker.pkg.dev/${var.project_id}/${var.docker_repository_name}/${var.feed_api_service}:${var.feed_api_image_version}" From 73ec1ac7a1d45a32270ca410bd204f80188f7e9a Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 3 Jun 2025 21:23:41 -0400 Subject: [PATCH 2/6] fix lint --- api/src/middleware/request_context.py | 1 - 1 file changed, 1 deletion(-) diff --git a/api/src/middleware/request_context.py b/api/src/middleware/request_context.py index 873a60562..6103cb7f1 100644 --- a/api/src/middleware/request_context.py +++ b/api/src/middleware/request_context.py @@ -3,7 +3,6 @@ import requests from google.auth import jwt -from requests.exceptions import RequestException from starlette.datastructures import Headers from starlette.types import Scope From 90ede69070cbaf0e3189f4fac5d52d0fe4ae2a36 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Tue, 3 Jun 2025 22:03:09 -0400 Subject: [PATCH 3/6] testing logs --- api/src/main.py | 3 +++ api/src/middleware/request_context_middleware.py | 5 +++++ api/src/utils/logger.py | 8 +++++--- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/api/src/main.py b/api/src/main.py index da3cb76a2..3e846eaff 100644 --- a/api/src/main.py +++ b/api/src/main.py @@ -15,6 +15,7 @@ # # This files allows to add extra application decorators aside from the generated code. # The app created here is intended to replace the generated feeds_gen.main:app variable. +import logging import os import uvicorn @@ -58,7 +59,9 @@ @app.on_event("startup") async def startup_event(): + print("print: TEST_LOG_FROM_MAIN_PY_INIT_QA_PROD") global_logging_setup() + logging.critical("TEST_LOG_FROM_MAIN_PY_INIT_QA_PROD") if __name__ == "__main__": diff --git a/api/src/middleware/request_context_middleware.py b/api/src/middleware/request_context_middleware.py index 02532f6cb..e86016b8b 100644 --- a/api/src/middleware/request_context_middleware.py +++ b/api/src/middleware/request_context_middleware.py @@ -12,6 +12,7 @@ class RequestContextMiddleware: def __init__(self, app: ASGIApp) -> None: self.logger = get_logger(API_ACCESS_LOG) + self.logger.critical("MIDDLEWARE_INIT_TEST_QA_PROD") self.app = app @staticmethod @@ -60,6 +61,10 @@ def log_api_access( """ Log the API access logs. """ + self.logger.critical( + f"MIDDLEWARE_USER_ID_CHECK_QA_PROD_CRITICAL: " + f"{request_context.user_id if request_context.user_id else 'USER_ID_IS_EMPTY_OR_NONE'}" + ) latency = time.time() - start_time request = self.create_http_request(scope, request_context, status_code, content_length, latency) headers = {k.decode().lower(): v.decode() for k, v in scope.get("headers", [])} diff --git a/api/src/utils/logger.py b/api/src/utils/logger.py index 0b07de8fe..1914bead5 100644 --- a/api/src/utils/logger.py +++ b/api/src/utils/logger.py @@ -116,10 +116,12 @@ def is_local_env(): def global_logging_setup(): + logging.critical("Starting cloud up logging") if is_local_env(): + logging.critical("Setting local up logging") logging.basicConfig(level=get_env_logging_level()) return - + logging.critical("Setting cloud up logging") # Send warnings through logging logging.captureWarnings(True) # Replace sys.stderr @@ -127,7 +129,7 @@ def global_logging_setup(): try: client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, structured=True) - handler.setLevel(get_env_logging_level()) + handler.setLevel(logging.DEBUG) handler.addFilter(GoogleCloudLogFilter(project=client.project)) except Exception as e: logging.error("Error initializing cloud logging: %s", e) @@ -136,7 +138,7 @@ def global_logging_setup(): # Configure root logger root_logger = logging.getLogger() - root_logger.setLevel(get_env_logging_level()) + root_logger.setLevel(logging.DEBUG) root_logger.handlers.clear() root_logger.addHandler(handler) From 3d11978e3541ac62f673c92642105ca80f0ecc42 Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 5 Jun 2025 10:46:02 -0400 Subject: [PATCH 4/6] add missing roles --- api/src/utils/logger.py | 45 ++++++++++++++++++++++------------------- infra/feed-api/main.tf | 25 +++++++++++++++++++++++ 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/api/src/utils/logger.py b/api/src/utils/logger.py index 1914bead5..29fe7df67 100644 --- a/api/src/utils/logger.py +++ b/api/src/utils/logger.py @@ -62,23 +62,26 @@ class GoogleCloudLogFilter(CloudLoggingFilter): """ def filter(self, record: logging.LogRecord) -> bool: - request_context = get_request_context() - http_request = get_http_request(record) - if http_request: - record.http_request = asdict(http_request) - span_id = request_context.get("span_id") - trace = get_trace(request_context) - record.trace = trace - record.span_id = span_id - - record._log_fields = { - "logging.googleapis.com/trace": trace, - "logging.googleapis.com/spanId": span_id, - "logging.googleapis.com/httpRequest": asdict(http_request) if http_request else None, - "logging.googleapis.com/trace_sampled": True, - } - super().filter(record) - + try: + request_context = get_request_context() + http_request = get_http_request(record) + if http_request: + record.http_request = asdict(http_request) + span_id = request_context.get("span_id") + trace = get_trace(request_context) + record.trace = trace + record.span_id = span_id + + record._log_fields = { + "logging.googleapis.com/trace": trace, + "logging.googleapis.com/spanId": span_id, + "logging.googleapis.com/httpRequest": asdict(http_request) if http_request else None, + "logging.googleapis.com/trace_sampled": True, + } + super().filter(record) + except Exception as e: + # Using print to avoid a recursive call the log filter + print(f"Error in GoogleCloudLogFilter: {e}") return True @@ -116,12 +119,12 @@ def is_local_env(): def global_logging_setup(): - logging.critical("Starting cloud up logging") + logging.debug("Starting cloud up logging") if is_local_env(): - logging.critical("Setting local up logging") + logging.debug("Setting local up logging") logging.basicConfig(level=get_env_logging_level()) return - logging.critical("Setting cloud up logging") + logging.debug("Setting cloud up logging") # Send warnings through logging logging.captureWarnings(True) # Replace sys.stderr @@ -154,4 +157,4 @@ def global_logging_setup(): ]: get_logger(name) - logging.info("Setting cloud up logging completed") + logging.debug("Setting cloud up logging completed") diff --git a/infra/feed-api/main.tf b/infra/feed-api/main.tf index a15278437..42d37143a 100644 --- a/infra/feed-api/main.tf +++ b/infra/feed-api/main.tf @@ -32,6 +32,23 @@ locals { # DEV and QA use the vpc connector vpc_connector_name = lower(var.environment) == "dev" ? "vpc-connector-qa" : "vpc-connector-${lower(var.environment)}" vpc_connector_project = lower(var.environment) == "dev" ? "mobility-feeds-qa" : var.project_id + service_account_roles = [ + # Cloud Logging: allows writing logs to GCP + "roles/logging.logWriter", + # Cloud Trace: allows writing trace and span data + "roles/cloudtrace.agent", + # Cloud Monitoring: allows publishing custom metrics + "roles/monitoring.metricWriter", + # Serverless VPC Access: required to use a VPC connector + "roles/vpcaccess.user" + ] + service_account_role_bindings = { + for role in local.service_account_roles : + "${role}" => { + role = role + project = role == "roles/vpcaccess.user" ? local.vpc_connector_project : var.project_id + } + } } data "google_vpc_access_connector" "vpc_connector" { @@ -108,6 +125,14 @@ resource "google_secret_manager_secret_iam_member" "policy" { member = "serviceAccount:${google_service_account.containers_service_account.email}" } +resource "google_project_iam_member" "containers_service_account_roles" { + for_each = local.service_account_role_bindings + + project = each.value.project + role = each.value.role + member = "serviceAccount:${google_service_account.containers_service_account.email}" +} + output "feed_api_uri" { value = google_cloud_run_v2_service.mobility-feed-api.uri description = "Main URI of the Feed API" From 853e6be67b86522ffbf116837062e3b0e7ae2d5a Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 5 Jun 2025 11:26:48 -0400 Subject: [PATCH 5/6] clean-up logger --- api/src/feeds/impl/feeds_api_impl.py | 4 +-- .../impl/models/validation_report_impl.py | 4 +-- api/src/main.py | 3 --- .../middleware/request_context_middleware.py | 5 ---- api/src/scripts/load_dataset_on_create.py | 4 +-- api/src/scripts/populate_db.py | 4 +-- api/src/scripts/populate_db_test_data.py | 5 ++-- api/src/shared/common/logging_utils.py | 26 ------------------- api/src/utils/logger.py | 4 +-- 9 files changed, 13 insertions(+), 46 deletions(-) diff --git a/api/src/feeds/impl/feeds_api_impl.py b/api/src/feeds/impl/feeds_api_impl.py index 6a54ca4dc..cc5b5b45b 100644 --- a/api/src/feeds/impl/feeds_api_impl.py +++ b/api/src/feeds/impl/feeds_api_impl.py @@ -35,7 +35,6 @@ InternalHTTPException, gbfs_feed_not_found, ) -from shared.common.logging_utils import Logger from shared.database.database import Database, with_db_session from shared.database_gen.sqlacodegen_models import ( Feed as FeedOrm, @@ -50,6 +49,7 @@ from shared.feed_filters.gtfs_feed_filter import LocationFilter from shared.feed_filters.gtfs_rt_feed_filter import GtfsRtFeedFilter, EntityTypeFilter from utils.date_utils import valid_iso_date +from utils.logger import get_logger T = TypeVar("T", bound="Feed") @@ -64,7 +64,7 @@ class FeedsApiImpl(BaseFeedsApi): APIFeedType = Union[FeedOrm, GtfsFeed, GtfsRTFeed] def __init__(self) -> None: - self.logger = Logger("FeedsApiImpl").get_logger() + self.logger = get_logger("FeedsApiImpl") @with_db_session def get_feed(self, id: str, db_session: Session) -> Feed: diff --git a/api/src/feeds/impl/models/validation_report_impl.py b/api/src/feeds/impl/models/validation_report_impl.py index 5c6bcb644..d8faccc9a 100644 --- a/api/src/feeds/impl/models/validation_report_impl.py +++ b/api/src/feeds/impl/models/validation_report_impl.py @@ -1,6 +1,6 @@ from shared.database_gen.sqlacodegen_models import Validationreport from feeds_gen.models.validation_report import ValidationReport -from shared.common.logging_utils import Logger +from utils.logger import get_logger DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" @@ -18,7 +18,7 @@ class Config: @classmethod def _get_logger(cls): - return Logger(ValidationReportImpl.__class__.__module__).get_logger() + return get_logger(ValidationReportImpl.__class__.__module__) @classmethod def from_orm(cls, validation_report: Validationreport | None) -> ValidationReport | None: diff --git a/api/src/main.py b/api/src/main.py index 3e846eaff..da3cb76a2 100644 --- a/api/src/main.py +++ b/api/src/main.py @@ -15,7 +15,6 @@ # # This files allows to add extra application decorators aside from the generated code. # The app created here is intended to replace the generated feeds_gen.main:app variable. -import logging import os import uvicorn @@ -59,9 +58,7 @@ @app.on_event("startup") async def startup_event(): - print("print: TEST_LOG_FROM_MAIN_PY_INIT_QA_PROD") global_logging_setup() - logging.critical("TEST_LOG_FROM_MAIN_PY_INIT_QA_PROD") if __name__ == "__main__": diff --git a/api/src/middleware/request_context_middleware.py b/api/src/middleware/request_context_middleware.py index e86016b8b..02532f6cb 100644 --- a/api/src/middleware/request_context_middleware.py +++ b/api/src/middleware/request_context_middleware.py @@ -12,7 +12,6 @@ class RequestContextMiddleware: def __init__(self, app: ASGIApp) -> None: self.logger = get_logger(API_ACCESS_LOG) - self.logger.critical("MIDDLEWARE_INIT_TEST_QA_PROD") self.app = app @staticmethod @@ -61,10 +60,6 @@ def log_api_access( """ Log the API access logs. """ - self.logger.critical( - f"MIDDLEWARE_USER_ID_CHECK_QA_PROD_CRITICAL: " - f"{request_context.user_id if request_context.user_id else 'USER_ID_IS_EMPTY_OR_NONE'}" - ) latency = time.time() - start_time request = self.create_http_request(scope, request_context, status_code, content_length, latency) headers = {k.decode().lower(): v.decode() for k, v in scope.get("headers", [])} diff --git a/api/src/scripts/load_dataset_on_create.py b/api/src/scripts/load_dataset_on_create.py index 80aede72c..5d97e641e 100644 --- a/api/src/scripts/load_dataset_on_create.py +++ b/api/src/scripts/load_dataset_on_create.py @@ -10,13 +10,13 @@ from google.cloud.pubsub_v1.futures import Future from shared.database_gen.sqlacodegen_models import Feed -from shared.common.logging_utils import Logger +from utils.logger import get_logger # Lazy create so we won't try to connect to google cloud when the file is imported. pubsub_client = None lock = threading.Lock() -logger = Logger("load_dataset_on_create").get_logger() +logger = get_logger("load_dataset_on_create") def get_pubsub_client(): diff --git a/api/src/scripts/populate_db.py b/api/src/scripts/populate_db.py index c474061ea..5223e70e6 100644 --- a/api/src/scripts/populate_db.py +++ b/api/src/scripts/populate_db.py @@ -9,13 +9,13 @@ from dotenv import load_dotenv from sqlalchemy import text -from shared.common.logging_utils import Logger from shared.database.database import Database from shared.database.database import configure_polymorphic_mappers from shared.database_gen.sqlacodegen_models import Feed, Gtfsrealtimefeed, Gtfsfeed, Gbfsfeed from shared.database_gen.sqlacodegen_models import ( t_feedsearch, ) +from utils.logger import get_logger if TYPE_CHECKING: from sqlalchemy.orm import Session @@ -50,7 +50,7 @@ def __init__(self, filepaths): Specify a list of files to load the csv data from. Can also be a single string with a file name. """ - self.logger = Logger(self.__class__.__name__).get_logger() + self.logger = get_logger(self.__class__.__name__) self.logger.setLevel(logging.INFO) self.db = Database(echo_sql=False) self.df = pandas.DataFrame() diff --git a/api/src/scripts/populate_db_test_data.py b/api/src/scripts/populate_db_test_data.py index 02465f3a8..bd71f6cd7 100644 --- a/api/src/scripts/populate_db_test_data.py +++ b/api/src/scripts/populate_db_test_data.py @@ -20,9 +20,10 @@ Gbfsfeed, ) from scripts.populate_db import set_up_configs, DatabasePopulateHelper -from shared.common.logging_utils import Logger from typing import TYPE_CHECKING +from utils.logger import get_logger + if TYPE_CHECKING: from sqlalchemy.orm import Session @@ -38,7 +39,7 @@ def __init__(self, filepaths): Specify a list of files to load the json data from. Can also be a single string with a file name. """ - self.logger = Logger(self.__class__.__module__).get_logger() + self.logger = get_logger(self.__class__.__module__) if not isinstance(filepaths, list): self.filepaths = [filepaths] diff --git a/api/src/shared/common/logging_utils.py b/api/src/shared/common/logging_utils.py index b7f36dc31..917454db8 100644 --- a/api/src/shared/common/logging_utils.py +++ b/api/src/shared/common/logging_utils.py @@ -7,29 +7,3 @@ def get_env_logging_level(): Get the logging level from the environment via OS variable LOGGING_LEVEL. Returns INFO if not set. """ return logging.getLevelName(os.getenv("LOGGING_LEVEL", "INFO")) - - -class Logger: - """ - Util class for logging information, errors or warnings - """ - - def __init__(self, name): - """ - Initialize the logger - """ - formatter = logging.Formatter("%(asctime)s %(levelname)s %(name)s %(message)s") - - console_handler = logging.StreamHandler() - console_handler.setFormatter(formatter) - - self.logger = logging.getLogger(name) - self.logger.addHandler(console_handler) - self.logger.setLevel(get_env_logging_level()) - - def get_logger(self): - """ - Get the logger instance - :return: the logger instance - """ - return self.logger diff --git a/api/src/utils/logger.py b/api/src/utils/logger.py index 29fe7df67..fb30a2714 100644 --- a/api/src/utils/logger.py +++ b/api/src/utils/logger.py @@ -132,7 +132,7 @@ def global_logging_setup(): try: client = google.cloud.logging.Client() handler = CloudLoggingHandler(client, structured=True) - handler.setLevel(logging.DEBUG) + handler.setLevel(get_env_logging_level()) handler.addFilter(GoogleCloudLogFilter(project=client.project)) except Exception as e: logging.error("Error initializing cloud logging: %s", e) @@ -141,7 +141,7 @@ def global_logging_setup(): # Configure root logger root_logger = logging.getLogger() - root_logger.setLevel(logging.DEBUG) + root_logger.setLevel(get_env_logging_level()) root_logger.handlers.clear() root_logger.addHandler(handler) From 5282584f26c3675886c69c31c84c83922ba3141e Mon Sep 17 00:00:00 2001 From: David Gamez Diaz <1192523+davidgamez@users.noreply.github.com> Date: Thu, 5 Jun 2025 14:54:19 -0400 Subject: [PATCH 6/6] fix project id --- infra/feed-api/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infra/feed-api/main.tf b/infra/feed-api/main.tf index 42d37143a..7c3018b62 100644 --- a/infra/feed-api/main.tf +++ b/infra/feed-api/main.tf @@ -46,7 +46,7 @@ locals { for role in local.service_account_roles : "${role}" => { role = role - project = role == "roles/vpcaccess.user" ? local.vpc_connector_project : var.project_id + project = var.project_id } } }