Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions functions-python/backfill_dataset_service_date_range/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,14 @@ def backfill_datasets(session: "Session"):
response.raise_for_status()
json_data = response.json()
else:
logging.info("Blob found, downloading from blob")
json_data = json.loads(dataset_blob.download_as_string())
try:
logging.info("Blob found, downloading from blob")
json_data = json.loads(dataset_blob.download_as_string())
except Exception as e:
logging.error(f"Error downloading blob: {e} trying json report url")
response = requests.get(json_report_url)
response.raise_for_status()
json_data = response.json()

extracted_service_start_date = (
json_data.get("summary", {})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,58 @@ def test_backfill_datasets(mock_get, mock_storage_client):
mock_session.commit.assert_called_once()


@patch("logging.error", autospec=True)
@patch("google.cloud.storage.Client", autospec=True)
@patch("requests.get")
def test_backfill_datasets_error_commit(mock_get, mock_storage_client, mock_logger):
# Mock the storage client and bucket
mock_bucket = MagicMock()
mock_client_instance = mock_storage_client.return_value
mock_client_instance.bucket.return_value = mock_bucket
mock_blob = MagicMock()
mock_blob.exists.return_value = False
mock_bucket.blob.return_value = mock_blob

mock_session = MagicMock()
mock_dataset = Mock(spec=Gtfsdataset)
mock_dataset.id = 1
mock_dataset.stable_id = "mdb-392-202406181921"
mock_dataset.service_date_range_end = None
mock_dataset.service_date_range_start = None
mock_dataset.validation_reports = [
MagicMock(
validator_version="6.0.0",
validated_at="2022-01-01T00:00:00Z",
json_report="http://example-2.com/report.json",
)
]

mock_query = MagicMock()
mock_query.options.return_value = mock_query
mock_query.filter.return_value = mock_query
mock_query.all.return_value = [mock_dataset]
mock_session.query.return_value = mock_query
mock_session.commit.side_effect = Exception("Commit failed")

mock_response = Mock()
mock_response.status_code = 200
mock_response.json.return_value = {
"summary": {
"feedInfo": {
"feedServiceWindowStart": "2023-01-01",
"feedServiceWindowEnd": "2023-12-31",
}
}
}
mock_get.return_value = mock_response

try:
backfill_datasets(mock_session)
except Exception:
mock_session.rollback.assert_called_once()
mock_session.close.assert_called_once()


@patch("google.cloud.storage.Client", autospec=True)
@patch("requests.get")
def test_backfill_datasets_no_validation_reports(mock_get, mock_storage_client):
Expand Down
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[question]: I'm confused about the renaming of the files, is this not affecting the deployment of the function? I would expect a change in the terraform code after files are moved from one folder to another.

Copy link
Copy Markdown
Contributor

@cka-y cka-y Mar 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggested renaming the folder as it was confusing to map the function in GCP to the source code folder. But you're right lines 37-38 of the python functions terraform need to be updated

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I'm clear Thanks!

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,20 @@ def generate_report_entities(

dataset = get_dataset(dataset_stable_id, session)
dataset.validation_reports.append(validation_report_entity)

if (
"summary" in json_report
and "feedInfo" in json_report["summary"]
and "feedServiceWindowStart" in json_report["summary"]["feedInfo"]
and "feedServiceWindowEnd" in json_report["summary"]["feedInfo"]
):
dataset.service_date_range_start = json_report["summary"]["feedInfo"][
"feedServiceWindowStart"
]
dataset.service_date_range_end = json_report["summary"]["feedInfo"][
"feedServiceWindowEnd"
]

for feature_name in json_report["summary"]["gtfsFeatures"]:
feature = get_feature(feature_name, session)
feature.validations.append(validation_report_entity)
Expand Down
4 changes: 2 additions & 2 deletions infra/functions-python/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ locals {
vpc_connector_name = lower(var.environment) == "dev" ? "vpc-connector-qa" : "vpc-connector-${lower(var.environment)}"
vpc_connector_project = lower(var.environment) == "dev" ? "mobility-feeds-qa" : var.project_id

function_process_validation_report_config = jsondecode(file("${path.module}/../../functions-python/validation_report_processor/function_config.json"))
function_process_validation_report_zip = "${path.module}/../../functions-python/validation_report_processor/.dist/validation_report_processor.zip"
function_process_validation_report_config = jsondecode(file("${path.module}/../../functions-python/process_validation_report/function_config.json"))
function_process_validation_report_zip = "${path.module}/../../functions-python/process_validation_report/.dist/process_validation_report.zip"
public_hosted_datasets_url = lower(var.environment) == "prod" ? "https://${var.public_hosted_datasets_dns}" : "https://${var.environment}-${var.public_hosted_datasets_dns}"

function_update_validation_report_config = jsondecode(file("${path.module}/../../functions-python/update_validation_report/function_config.json"))
Expand Down