|
7 | 7 | from typing import List, Tuple, Optional |
8 | 8 |
|
9 | 9 | from shared.common.db_utils import normalize_url, normalize_url_str |
10 | | -from shared.database_gen.sqlacodegen_models import License, FeedLicenseChange |
| 10 | +from shared.database_gen.sqlacodegen_models import License, FeedLicenseChange, Feed |
11 | 11 |
|
12 | 12 |
|
13 | 13 | @dataclass |
@@ -534,3 +534,131 @@ def assign_license_by_url( |
534 | 534 | ) |
535 | 535 |
|
536 | 536 | return best |
| 537 | + |
| 538 | + |
| 539 | +@dataclass |
| 540 | +class PropagateLicenseAffectedFeedResult: |
| 541 | + """Describes a single feed affected by a license propagation.""" |
| 542 | + |
| 543 | + feed_id: str |
| 544 | + previous_license_id: Optional[str] |
| 545 | + data_type: Optional[str] |
| 546 | + |
| 547 | + |
| 548 | +@dataclass |
| 549 | +class PropagateLicenseResult: |
| 550 | + """Result of a license propagation operation. |
| 551 | +
|
| 552 | + Attributes: |
| 553 | + license_id: The license ID that was propagated. |
| 554 | + license_url: The original license URL provided for matching. |
| 555 | + normalized_license_url: Normalized form of the license URL used for matching. |
| 556 | + dry_run: Whether this was a dry-run (no changes persisted). |
| 557 | + override: Whether feeds with an existing license_id were also updated. |
| 558 | + total_feeds_with_same_url: Total feeds sharing the same normalized license URL. |
| 559 | + affected_feeds_count: Number of feeds that were (or would be) updated. |
| 560 | + affected_feeds: List of affected feed descriptors. |
| 561 | + """ |
| 562 | + |
| 563 | + license_id: str |
| 564 | + license_url: str |
| 565 | + normalized_license_url: str |
| 566 | + dry_run: bool |
| 567 | + override: bool |
| 568 | + total_feeds_with_same_url: int |
| 569 | + affected_feeds_count: int |
| 570 | + affected_feeds: List[PropagateLicenseAffectedFeedResult] |
| 571 | + |
| 572 | + |
| 573 | +def propagate_license_by_url( |
| 574 | + license_id: str, |
| 575 | + license_url: str, |
| 576 | + db_session: Session, |
| 577 | + *, |
| 578 | + dry_run: bool = True, |
| 579 | + override: bool = False, |
| 580 | +) -> PropagateLicenseResult: |
| 581 | + """Propagate a license ID to all feeds sharing the same normalized license URL. |
| 582 | +
|
| 583 | + Finds all published (non-unpublished) feeds whose license_url normalizes to the |
| 584 | + same value as ``license_url``, then optionally updates their ``license_id`` and |
| 585 | + creates ``FeedLicenseChange`` audit records. |
| 586 | +
|
| 587 | + Args: |
| 588 | + license_id: The license ID to propagate. Must exist in the ``license`` table. |
| 589 | + license_url: The reference URL whose normalized form is used for matching. |
| 590 | + db_session: Active SQLAlchemy session. |
| 591 | + dry_run: When True (default), compute results without persisting changes. |
| 592 | + override: When False (default), only update feeds where ``license_id IS NULL``. |
| 593 | + When True, also update feeds that already have a different ``license_id``. |
| 594 | +
|
| 595 | + Returns: |
| 596 | + A ``PropagateLicenseResult`` describing the outcome. |
| 597 | +
|
| 598 | + Raises: |
| 599 | + ValueError: If ``license_id`` does not exist in the database. |
| 600 | + """ |
| 601 | + existing_license = db_session.get(License, license_id) |
| 602 | + if existing_license is None: |
| 603 | + raise ValueError(f"License '{license_id}' not found in the database.") |
| 604 | + |
| 605 | + normalized_url = normalize_url_str(license_url) |
| 606 | + |
| 607 | + # Find all feeds with the same normalized license URL. |
| 608 | + # Use the same SQL normalization pattern as get_feed_query_by_normalized_url. |
| 609 | + candidate_query = db_session.query(Feed).filter( |
| 610 | + Feed.license_url.isnot(None), |
| 611 | + Feed.operational_status != "unpublished", |
| 612 | + normalized_url == func.lower(func.trim(normalize_url(Feed.license_url))), |
| 613 | + ) |
| 614 | + all_candidates = candidate_query.all() |
| 615 | + total_feeds_with_same_url = len(all_candidates) |
| 616 | + |
| 617 | + if override: |
| 618 | + feeds_to_update = [f for f in all_candidates if f.license_id != license_id] |
| 619 | + else: |
| 620 | + feeds_to_update = [f for f in all_candidates if f.license_id is None] |
| 621 | + |
| 622 | + affected: List[PropagateLicenseAffectedFeedResult] = [] |
| 623 | + for feed in feeds_to_update: |
| 624 | + affected.append( |
| 625 | + PropagateLicenseAffectedFeedResult( |
| 626 | + feed_id=feed.stable_id, |
| 627 | + previous_license_id=feed.license_id, |
| 628 | + data_type=feed.data_type, |
| 629 | + ) |
| 630 | + ) |
| 631 | + if not dry_run: |
| 632 | + feed.license_id = license_id |
| 633 | + db_session.add( |
| 634 | + FeedLicenseChange( |
| 635 | + feed_id=feed.id, |
| 636 | + feed_license_url=feed.license_url, |
| 637 | + matched_license_id=license_id, |
| 638 | + confidence=1.0, |
| 639 | + match_type="propagated", |
| 640 | + matched_source="propagate_match", |
| 641 | + verified=True, |
| 642 | + ) |
| 643 | + ) |
| 644 | + |
| 645 | + logging.info( |
| 646 | + "propagate_license_by_url: license_id=%s url=%s dry_run=%s override=%s " "total_with_url=%d affected=%d", |
| 647 | + license_id, |
| 648 | + license_url, |
| 649 | + dry_run, |
| 650 | + override, |
| 651 | + total_feeds_with_same_url, |
| 652 | + len(affected), |
| 653 | + ) |
| 654 | + |
| 655 | + return PropagateLicenseResult( |
| 656 | + license_id=license_id, |
| 657 | + license_url=license_url, |
| 658 | + normalized_license_url=normalized_url, |
| 659 | + dry_run=dry_run, |
| 660 | + override=override, |
| 661 | + total_feeds_with_same_url=total_feeds_with_same_url, |
| 662 | + affected_feeds_count=len(affected), |
| 663 | + affected_feeds=affected, |
| 664 | + ) |
0 commit comments