Skip to content

Commit d6c8289

Browse files
author
sw33tLie
committed
improve ai extraction from policy
1 parent 21e48da commit d6c8289

2 files changed

Lines changed: 16 additions & 32 deletions

File tree

cmd/poll.go

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -564,28 +564,20 @@ func mergePolicyItems(processedItems []storage.TargetItem, policyItems []storage
564564
return processedItems
565565
}
566566

567-
// Build a set of all existing variant values (normalized) per category across all items,
568-
// so we can detect duplicates.
569-
type variantKey struct {
570-
normalized string
571-
category string
572-
}
573-
existingVariants := make(map[variantKey]bool)
567+
// Build a set of all existing target values (normalized, scheme-stripped).
568+
// We deduplicate by normalized target ONLY (ignoring category), because
569+
// the same domain should not be added as a variant if it already exists
570+
// as a raw target or AI variant under any category.
571+
existingTargets := make(map[string]bool)
574572
for _, item := range processedItems {
575-
// The raw target itself is also "known" — normalize it for comparison
576573
rawNorm := normForDedup(item.URI)
577-
cat := strings.ToLower(strings.TrimSpace(scope.NormalizeCategory(item.Category)))
578574
if rawNorm != "" {
579-
existingVariants[variantKey{normalized: rawNorm, category: cat}] = true
575+
existingTargets[rawNorm] = true
580576
}
581577
for _, v := range item.Variants {
582578
vNorm := normForDedup(v.Value)
583-
vCat := cat
584-
if v.HasCategory {
585-
vCat = strings.ToLower(strings.TrimSpace(v.Category))
586-
}
587579
if vNorm != "" {
588-
existingVariants[variantKey{normalized: vNorm, category: vCat}] = true
580+
existingTargets[vNorm] = true
589581
}
590582
}
591583
}
@@ -607,8 +599,8 @@ func mergePolicyItems(processedItems []storage.TargetItem, policyItems []storage
607599
continue
608600
}
609601

610-
// Skip if this target+category already exists
611-
if existingVariants[variantKey{normalized: piNorm, category: piCat}] {
602+
// Skip if this target already exists under any category
603+
if existingTargets[piNorm] {
612604
continue
613605
}
614606

@@ -639,7 +631,7 @@ func mergePolicyItems(processedItems []storage.TargetItem, policyItems []storage
639631
HasCategory: true,
640632
Category: piCat,
641633
})
642-
existingVariants[variantKey{normalized: piNorm, category: piCat}] = true
634+
existingTargets[piNorm] = true
643635
}
644636

645637
return processedItems

website/pkg/core/poller.go

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -444,25 +444,17 @@ func webMergePolicyItems(processedItems []storage.TargetItem, policyItems []stor
444444
return processedItems
445445
}
446446

447-
type variantKey struct {
448-
normalized string
449-
category string
450-
}
451-
existingVariants := make(map[variantKey]bool)
447+
// Deduplicate by normalized target ONLY (ignoring category).
448+
existingTargets := make(map[string]bool)
452449
for _, item := range processedItems {
453450
rawNorm := webNormForDedup(item.URI)
454-
cat := strings.ToLower(strings.TrimSpace(scope.NormalizeCategory(item.Category)))
455451
if rawNorm != "" {
456-
existingVariants[variantKey{normalized: rawNorm, category: cat}] = true
452+
existingTargets[rawNorm] = true
457453
}
458454
for _, v := range item.Variants {
459455
vNorm := webNormForDedup(v.Value)
460-
vCat := cat
461-
if v.HasCategory {
462-
vCat = strings.ToLower(strings.TrimSpace(v.Category))
463-
}
464456
if vNorm != "" {
465-
existingVariants[variantKey{normalized: vNorm, category: vCat}] = true
457+
existingTargets[vNorm] = true
466458
}
467459
}
468460
}
@@ -482,7 +474,7 @@ func webMergePolicyItems(processedItems []storage.TargetItem, policyItems []stor
482474
continue
483475
}
484476

485-
if existingVariants[variantKey{normalized: piNorm, category: piCat}] {
477+
if existingTargets[piNorm] {
486478
continue
487479
}
488480

@@ -511,7 +503,7 @@ func webMergePolicyItems(processedItems []storage.TargetItem, policyItems []stor
511503
HasCategory: true,
512504
Category: piCat,
513505
})
514-
existingVariants[variantKey{normalized: piNorm, category: piCat}] = true
506+
existingTargets[piNorm] = true
515507
}
516508

517509
return processedItems

0 commit comments

Comments
 (0)