Skip to content

Commit 123943a

Browse files
authored
feat: Propagate license ID across feeds sharing the same license URL (#1668)
1 parent acf9e5d commit 123943a

9 files changed

Lines changed: 861 additions & 6 deletions

File tree

api/src/shared/common/license_utils.py

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import List, Tuple, Optional
88

99
from shared.common.db_utils import normalize_url, normalize_url_str
10-
from shared.database_gen.sqlacodegen_models import License, FeedLicenseChange
10+
from shared.database_gen.sqlacodegen_models import License, FeedLicenseChange, Feed
1111

1212

1313
@dataclass
@@ -534,3 +534,131 @@ def assign_license_by_url(
534534
)
535535

536536
return best
537+
538+
539+
@dataclass
540+
class PropagateLicenseAffectedFeedResult:
541+
"""Describes a single feed affected by a license propagation."""
542+
543+
feed_id: str
544+
previous_license_id: Optional[str]
545+
data_type: Optional[str]
546+
547+
548+
@dataclass
549+
class PropagateLicenseResult:
550+
"""Result of a license propagation operation.
551+
552+
Attributes:
553+
license_id: The license ID that was propagated.
554+
license_url: The original license URL provided for matching.
555+
normalized_license_url: Normalized form of the license URL used for matching.
556+
dry_run: Whether this was a dry-run (no changes persisted).
557+
override: Whether feeds with an existing license_id were also updated.
558+
total_feeds_with_same_url: Total feeds sharing the same normalized license URL.
559+
affected_feeds_count: Number of feeds that were (or would be) updated.
560+
affected_feeds: List of affected feed descriptors.
561+
"""
562+
563+
license_id: str
564+
license_url: str
565+
normalized_license_url: str
566+
dry_run: bool
567+
override: bool
568+
total_feeds_with_same_url: int
569+
affected_feeds_count: int
570+
affected_feeds: List[PropagateLicenseAffectedFeedResult]
571+
572+
573+
def propagate_license_by_url(
574+
license_id: str,
575+
license_url: str,
576+
db_session: Session,
577+
*,
578+
dry_run: bool = True,
579+
override: bool = False,
580+
) -> PropagateLicenseResult:
581+
"""Propagate a license ID to all feeds sharing the same normalized license URL.
582+
583+
Finds all published (non-unpublished) feeds whose license_url normalizes to the
584+
same value as ``license_url``, then optionally updates their ``license_id`` and
585+
creates ``FeedLicenseChange`` audit records.
586+
587+
Args:
588+
license_id: The license ID to propagate. Must exist in the ``license`` table.
589+
license_url: The reference URL whose normalized form is used for matching.
590+
db_session: Active SQLAlchemy session.
591+
dry_run: When True (default), compute results without persisting changes.
592+
override: When False (default), only update feeds where ``license_id IS NULL``.
593+
When True, also update feeds that already have a different ``license_id``.
594+
595+
Returns:
596+
A ``PropagateLicenseResult`` describing the outcome.
597+
598+
Raises:
599+
ValueError: If ``license_id`` does not exist in the database.
600+
"""
601+
existing_license = db_session.get(License, license_id)
602+
if existing_license is None:
603+
raise ValueError(f"License '{license_id}' not found in the database.")
604+
605+
normalized_url = normalize_url_str(license_url)
606+
607+
# Find all feeds with the same normalized license URL.
608+
# Use the same SQL normalization pattern as get_feed_query_by_normalized_url.
609+
candidate_query = db_session.query(Feed).filter(
610+
Feed.license_url.isnot(None),
611+
Feed.operational_status != "unpublished",
612+
normalized_url == func.lower(func.trim(normalize_url(Feed.license_url))),
613+
)
614+
all_candidates = candidate_query.all()
615+
total_feeds_with_same_url = len(all_candidates)
616+
617+
if override:
618+
feeds_to_update = [f for f in all_candidates if f.license_id != license_id]
619+
else:
620+
feeds_to_update = [f for f in all_candidates if f.license_id is None]
621+
622+
affected: List[PropagateLicenseAffectedFeedResult] = []
623+
for feed in feeds_to_update:
624+
affected.append(
625+
PropagateLicenseAffectedFeedResult(
626+
feed_id=feed.stable_id,
627+
previous_license_id=feed.license_id,
628+
data_type=feed.data_type,
629+
)
630+
)
631+
if not dry_run:
632+
feed.license_id = license_id
633+
db_session.add(
634+
FeedLicenseChange(
635+
feed_id=feed.id,
636+
feed_license_url=feed.license_url,
637+
matched_license_id=license_id,
638+
confidence=1.0,
639+
match_type="propagated",
640+
matched_source="propagate_match",
641+
verified=True,
642+
)
643+
)
644+
645+
logging.info(
646+
"propagate_license_by_url: license_id=%s url=%s dry_run=%s override=%s " "total_with_url=%d affected=%d",
647+
license_id,
648+
license_url,
649+
dry_run,
650+
override,
651+
total_feeds_with_same_url,
652+
len(affected),
653+
)
654+
655+
return PropagateLicenseResult(
656+
license_id=license_id,
657+
license_url=license_url,
658+
normalized_license_url=normalized_url,
659+
dry_run=dry_run,
660+
override=override,
661+
total_feeds_with_same_url=total_feeds_with_same_url,
662+
affected_feeds_count=len(affected),
663+
affected_feeds=affected,
664+
)

docs/OperationsAPI.yaml

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,36 @@ paths:
333333
application/json:
334334
schema:
335335
$ref: "#/components/schemas/MatchingLicenses"
336+
/v1/operations/licenses:propagate_match:
337+
post:
338+
description: >
339+
Propagate a license ID to all feeds sharing the same normalized license URL. Use dry_run=true (the default) to preview which feeds would be updated without persisting any changes.
340+
341+
tags:
342+
- "licenses"
343+
operationId: propagateMatchLicense
344+
security:
345+
- ApiKeyAuth: []
346+
requestBody:
347+
description: Payload containing the license ID to propagate and the license URL used for matching.
348+
required: true
349+
content:
350+
application/json:
351+
schema:
352+
$ref: "#/components/schemas/PropagateLicenseRequest"
353+
responses:
354+
"200":
355+
description: The result of the propagation, including the list of affected feeds.
356+
content:
357+
application/json:
358+
schema:
359+
$ref: "#/components/schemas/PropagateLicenseResponse"
360+
"400":
361+
description: Bad request (e.g. unknown license_id).
362+
"422":
363+
description: Validation error (e.g. missing required fields).
364+
"500":
365+
description: Internal server error.
336366
/v1/operations/licenses/{id}:
337367
parameters:
338368
- $ref: "#/components/parameters/license_id_path_param"
@@ -1389,6 +1419,88 @@ components:
13891419
type: array
13901420
items:
13911421
$ref: "#/components/schemas/MatchingLicense"
1422+
PropagateLicenseAffectedFeed:
1423+
x-operation: true
1424+
type: object
1425+
description: A feed affected by a license propagation operation.
1426+
properties:
1427+
feed_id:
1428+
description: The stable ID of the affected feed.
1429+
type: string
1430+
example: mdb-42
1431+
previous_license_id:
1432+
description: The license ID the feed had before propagation (null if none).
1433+
type: string
1434+
nullable: true
1435+
example: null
1436+
data_type:
1437+
description: The data type of the feed.
1438+
type: string
1439+
enum: [gtfs, gtfs_rt, gbfs]
1440+
example: gtfs
1441+
PropagateLicenseRequest:
1442+
x-operation: true
1443+
type: object
1444+
description: Request payload for the propagate_match license endpoint.
1445+
required:
1446+
- license_id
1447+
- license_url
1448+
properties:
1449+
license_id:
1450+
description: The license ID to propagate to matching feeds.
1451+
type: string
1452+
example: CC-BY-4.0
1453+
license_url:
1454+
description: The license URL whose normalized form is used to find matching feeds.
1455+
type: string
1456+
format: url
1457+
example: https://creativecommons.org/licenses/by/4.0/deed.nl
1458+
dry_run:
1459+
description: >
1460+
When true (default), compute and return the affected feeds without persisting any changes. Set to false to apply the changes.
1461+
1462+
type: boolean
1463+
default: true
1464+
override:
1465+
description: >
1466+
When false (default), only feeds whose license_id is currently unset (null) are updated. When true, feeds that already have a license_id are also updated.
1467+
1468+
type: boolean
1469+
default: false
1470+
PropagateLicenseResponse:
1471+
x-operation: true
1472+
type: object
1473+
description: Result of a license propagation operation.
1474+
properties:
1475+
license_id:
1476+
description: The license ID that was propagated.
1477+
type: string
1478+
example: CC-BY-4.0
1479+
license_url:
1480+
description: The original license URL provided for matching.
1481+
type: string
1482+
example: https://creativecommons.org/licenses/by/4.0/deed.nl
1483+
normalized_license_url:
1484+
description: The normalized form of the license URL used for matching.
1485+
type: string
1486+
example: creativecommons.org/licenses/by/4.0/deed.nl
1487+
dry_run:
1488+
description: Whether this was a dry run (no changes persisted).
1489+
type: boolean
1490+
override:
1491+
description: Whether feeds with an existing license_id were also updated.
1492+
type: boolean
1493+
total_feeds_with_same_url:
1494+
description: Total number of feeds sharing the same normalized license URL (regardless of filter).
1495+
type: integer
1496+
affected_feeds_count:
1497+
description: Number of feeds that were (or would be) updated.
1498+
type: integer
1499+
affected_feeds:
1500+
description: List of feeds that were (or would be) updated.
1501+
type: array
1502+
items:
1503+
$ref: "#/components/schemas/PropagateLicenseAffectedFeed"
13921504
OperationCreateRequestGtfsFeed:
13931505
x-operation: true
13941506
type: object
@@ -1441,6 +1553,12 @@ components:
14411553
type: array
14421554
items:
14431555
$ref: '#/components/schemas/FeedRelatedLink'
1556+
propagate_license:
1557+
type: boolean
1558+
default: false
1559+
description: >
1560+
When true, after the feed is created, propagate its license_id to all other feeds sharing the same normalized license_url where license_id is currently unset.
1561+
14441562
required:
14451563
- source_info
14461564
- operational_status
@@ -1504,6 +1622,8 @@ components:
15041622
15051623
15061624
1625+
1626+
15071627
* vp - vehicle positions
15081628
* tu - trip updates
15091629
* sa - service alerts
@@ -1522,6 +1642,12 @@ components:
15221642
type: array
15231643
items:
15241644
$ref: '#/components/schemas/FeedRelatedLink'
1645+
propagate_license:
1646+
type: boolean
1647+
default: false
1648+
description: >
1649+
When true, after the feed is created, propagate its license_id to all other feeds sharing the same normalized license_url where license_id is currently unset.
1650+
15251651
required:
15261652
- source_info
15271653
- operational_status
@@ -1627,6 +1753,8 @@ components:
16271753
16281754
16291755
1756+
1757+
16301758
* vp - vehicle positions
16311759
* tu - trip updates
16321760
* sa - service alerts
@@ -1646,6 +1774,12 @@ components:
16461774
official:
16471775
type: boolean
16481776
description: Whether this is an official feed.
1777+
propagate_license:
1778+
type: boolean
1779+
default: false
1780+
description: >
1781+
When true, after the feed is updated, propagate its license_id to all other feeds sharing the same normalized license_url where license_id is currently unset.
1782+
16491783
required:
16501784
- id
16511785
- status
@@ -1695,6 +1829,12 @@ components:
16951829
official:
16961830
type: boolean
16971831
description: Whether this is an official feed.
1832+
propagate_license:
1833+
type: boolean
1834+
default: false
1835+
description: >
1836+
When true, after the feed is updated, propagate its license_id to all other feeds sharing the same normalized license_url where license_id is currently unset.
1837+
16981838
required:
16991839
- id
17001840
- status
@@ -1707,6 +1847,8 @@ components:
17071847
17081848
17091849
1850+
1851+
17101852
* `active` Feed should be used in public trip planners.
17111853
* `deprecated` Feed is explicitly deprecated and should not be used in public trip planners.
17121854
* `inactive` Feed hasn't been recently updated and should be used at risk of providing outdated information.
@@ -1729,6 +1871,8 @@ components:
17291871
17301872
17311873
1874+
1875+
17321876
* `gtfs` GTFS feed.
17331877
* `gtfs_rt` GTFS-RT feed.
17341878
* `gbfs` GBFS feed.

functions-python/operations_api/.openapi-generator/FILES

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ src/feeds_gen/models/operation_create_request_gtfs_rt_feed.py
3737
src/feeds_gen/models/operation_feed.py
3838
src/feeds_gen/models/operation_gtfs_feed.py
3939
src/feeds_gen/models/operation_gtfs_rt_feed.py
40+
src/feeds_gen/models/propagate_license_affected_feed.py
41+
src/feeds_gen/models/propagate_license_request.py
42+
src/feeds_gen/models/propagate_license_response.py
4043
src/feeds_gen/models/redirect.py
4144
src/feeds_gen/models/search_feed_item_result.py
4245
src/feeds_gen/models/source_info.py

0 commit comments

Comments
 (0)