managedcode
diff --git a/‎AGENTS.md‎
Lines changed: 5 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Directory.Packages.props‎
Lines changed: 2 additions & 0 deletions b/‎Directory.Packages.props‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎PLAN-MIGRATION.md‎
Lines changed: 14 additions & 15 deletions b/‎PLAN-MIGRATION.md‎
Lines changed: 14 additions & 15 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AnalyzerEngine.cs‎
Lines changed: 2 additions & 6 deletions b/‎src/ManagedCode.Presidio.Analyzer/AnalyzerEngine.cs‎
Lines changed: 2 additions & 6 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AssemblyAttributes.cs‎
Lines changed: 4 additions & 0 deletions b/‎src/ManagedCode.Presidio.Analyzer/AssemblyAttributes.cs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AuAbnRecognizer.cs‎
Lines changed: 67 additions & 0 deletions b/‎src/ManagedCode.Presidio.Analyzer/AuAbnRecognizer.cs‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AuAcnRecognizer.cs‎
Lines changed: 67 additions & 0 deletions b/‎src/ManagedCode.Presidio.Analyzer/AuAcnRecognizer.cs‎
Lines changed: 67 additions & 0 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AuMedicareRecognizer.cs‎
Lines changed: 65 additions & 0 deletions b/‎src/ManagedCode.Presidio.Analyzer/AuMedicareRecognizer.cs‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎src/ManagedCode.Presidio.Analyzer/AuTfnRecognizer.cs‎
Lines changed: 65 additions & 0 deletions b/‎src/ManagedCode.Presidio.Analyzer/AuTfnRecognizer.cs‎
Lines changed: 65 additions & 0 deletions
@@ -38,6 +38,11 @@ Update guidelines:
 - for Presidio analyzer tests, NEVER add stubbed recognizer tests; port the Python scenarios to exercise the real analyzer pipeline end-to-end
 - for Presidio analyzer parity work, keep iterating without pausing for confirmation and focus solely on integration tests that validate real functionality
 - for Presidio migration tasks, do not stop to ask the user for clarification mid-task; follow the migration plan and deliver completed work
+- for Presidio migration tasks, when the user says "продовжити"/"continue", proceed through the target file step by step without asking for additional confirmation
+- for Presidio migration tasks, when you see a way to improve something, note the idea in the working file and then implement it without waiting for user approval
+- for Presidio migration tasks, default to continuing the migration workflow without waiting for "продовжити"/"continue"; halt only if the user explicitly redirects
+- for Presidio migration tasks, when the user specifies an execution order for follow-up work, honor that sequence without reconfirming and keep progressing task-by-task
+- for Presidio migration tasks, capture any important follow-up items directly in the working file as TODOs so they are not lost
 - for Presidio test work, ALWAYS include negative/error scenarios alongside positive cases to validate failure paths
 - for Presidio recognizer coverage, ensure EU social security numbers are handled alongside US SSN patterns
 - use enums and constants over magic strings and numbers
 
@@ -10,5 +10,7 @@
     <PackageVersion Include="Microsoft.ML.OnnxRuntime" Version="1.19.2" />
     <PackageVersion Include="Microsoft.ML.Tokenizers" Version="1.0.3" />
     <PackageVersion Include="libphonenumber-csharp" Version="8.13.36" />
+    <PackageVersion Include="YamlDotNet" Version="15.1.2" />
+    <PackageVersion Include="Shouldly" Version="4.2.1" />
   </ItemGroup>
 </Project>
@@ -15,8 +15,8 @@ This document tracks parity work between `external/microsoft-presidio` (Python)
 | `EntityRecognizer` base | `presidio_analyzer/entity_recognizer.py` | ✅ | Ported as `EntityRecognizer` (C#) |
 | `Pattern` helper | `predefined_recognizers/generic/pattern_recognizer.py` | ✅ | Implemented as `Pattern` (C#) |
 | `PatternRecognizer` | same | ✅ | Implemented with regex caching & validation hooks |
-| `LocalRecognizer` / remote base classes | `local_recognizer.py`, `remote_recognizer.py` | 🚧 | Not yet required; evaluate once we port remote recognizers |
-| `RecognizerRegistry` & provider | `recognizer_registry.py` | 🚧 | Basic functionality present; needs YAML loader + configuration parity |
+| `LocalRecognizer` / remote base classes | `local_recognizer.py`, `remote_recognizer.py` | ✅ | Ported as `LocalRecognizer`/`RemoteRecognizer` with unit coverage |
+| `RecognizerRegistry` & provider | `recognizer_registry.py` | ✅ | YAML-backed loader + reflective instantiation aligned with Python |
 | NLP engines (spaCy, transformers, etc.) | `nlp_engine/` | 🚧 | Only ONNX pipeline ported (`OnnxNlpEngine`) |
 
 ## Generic Recognizers
@@ -37,17 +37,17 @@ This document tracks parity work between `external/microsoft-presidio` (Python)
 
 | Country | Python Class | Status |
 | --- | --- | --- |
-| Australia | `AuAbnRecognizer`, `AuAcnRecognizer`, `AuMedicareRecognizer`, `AuTfnRecognizer` | 🚧 |
+| Australia | `AuAbnRecognizer`, `AuAcnRecognizer`, `AuMedicareRecognizer`, `AuTfnRecognizer` | ✅ |
 | Finland | `FiPersonalIdentityCodeRecognizer` | ✅ |
-| India | `InAadhaarRecognizer`, `InGstinRecognizer`, `InPanRecognizer`, `InPassportRecognizer`, `InVehicleRegistrationRecognizer`, `InVoterRecognizer` | 🚧 |
-| Italy | `ItDriverLicenseRecognizer`, `ItFiscalCodeRecognizer`, `ItIdentityCardRecognizer`, `ItPassportRecognizer`, `ItVatCodeRecognizer` | 🚧 |
-| Korea | `KrRrnRecognizer` | 🚧 |
+| India | `InAadhaarRecognizer`, `InGstinRecognizer`, `InPanRecognizer`, `InPassportRecognizer`, `InVehicleRegistrationRecognizer`, `InVoterRecognizer` | ✅ |
+| Italy | `ItDriverLicenseRecognizer`, `ItFiscalCodeRecognizer`, `ItIdentityCardRecognizer`, `ItPassportRecognizer`, `ItVatCodeRecognizer` | ✅ |
+| Korea | `KrRrnRecognizer` | ✅ |
 | Poland | `PlPeselRecognizer` | ✅ |
-| Singapore | `SgFinRecognizer`, `SgUenRecognizer` | 🚧 |
-| Spain | `EsNieRecognizer`, `EsNifRecognizer` | 🚧 |
-| Thailand | `ThTninRecognizer` | 🚧 |
-| UK | `NhsRecognizer`, `UkNinoRecognizer` | 🚧 |
-| US | `MedicalLicenseRecognizer`, `UsBankRecognizer`, `UsLicenseRecognizer`, `UsItinRecognizer`, `UsPassportRecognizer` (🚧), `UsSsnRecognizer` (✅) | 🚧 |
+| Singapore | `SgFinRecognizer`, `SgUenRecognizer` | ✅ |
+| Spain | `EsNieRecognizer`, `EsNifRecognizer` | ✅ |
+| Thailand | `ThTninRecognizer` | ✅ |
+| UK | `NhsRecognizer`, `UkNinoRecognizer` | ✅ |
+| US | `MedicalLicenseRecognizer`, `UsBankRecognizer`, `UsLicenseRecognizer`, `UsItinRecognizer`, `UsPassportRecognizer`, `UsSsnRecognizer` | ✅ |
 
 ## NLP Engine Recognizers
 
@@ -67,7 +67,6 @@ This document tracks parity work between `external/microsoft-presidio` (Python)
 
 ## Next Actions
 
-- Begin porting country-specific recognizers (prioritize high-demand markets) now that the generic suite is complete in C#.
-- Triage country-specific recognizers based on customer demand.
-- Flesh out registry configuration loading (YAML) for parity with Python.
-- Plan for NLP engine parity (spaCy/Stanza/Transformers) or replacements.
+- Continue porting any remaining country-specific recognizers not yet covered (e.g., Australia-specific business identifiers beyond the current scope, additional EU IDs, etc.).
+- Prioritize recognizer backlog based on customer demand and add coverage tests alongside each port.
+- Implement .NET equivalents for spaCy/Stanza/Transformers NLP engines or design alternative pipelines that meet parity guarantees.
@@ -38,10 +38,8 @@ public AnalyzerEngine(
 
         if (registry is null)
         {
-            var provider = new RecognizerRegistryProvider(
-                new RecognizerRegistryConfiguration(_supportedLanguages));
-            _registry = provider.CreateRecognizerRegistry();
-            _registry.AddNlpRecognizer(_nlpEngine);
+            var provider = new RecognizerRegistryProvider();
+            _registry = provider.CreateRecognizerRegistry(_nlpEngine, _supportedLanguages);
         }
         else
         {
@@ -54,8 +52,6 @@ public AnalyzerEngine(
             _registry = registry;
         }
 
-        _registry.LoadPredefinedRecognizers(_nlpEngine, _supportedLanguages);
-
         _contextAwareEnhancer = contextAwareEnhancer ?? new LemmaContextAwareEnhancer();
     }
 
 
@@ -0,0 +1,4 @@
+using System.Runtime.CompilerServices;
+
+[assembly: InternalsVisibleTo("ManagedCode.Presidio.Analyzer.Tests")]
+
@@ -0,0 +1,67 @@
+namespace ManagedCode.Presidio.Analyzer;
+
+/// <summary>
+/// Recognizes Australian Business Numbers (ABN) using canonical patterns and checksum validation.
+/// </summary>
+public sealed class AuAbnRecognizer(
+    IEnumerable<Pattern>? patterns = null,
+    IEnumerable<string>? context = null,
+    string supportedLanguage = "en",
+    string supportedEntity = "AU_ABN",
+    IEnumerable<(string Search, string Replacement)>? replacementPairs = null) : PatternRecognizer(
+        supportedEntity,
+        patterns ?? DefaultPatterns,
+        context: context ?? DefaultContext,
+        supportedLanguage: supportedLanguage)
+{
+    private static readonly Pattern[] DefaultPatterns =
+    {
+        new("ABN (Medium)", @"\b\d{2}\s\d{3}\s\d{3}\s\d{3}\b", 0.1),
+        new("ABN (Low)", @"\b\d{11}\b", 0.01),
+    };
+
+    private static readonly string[] DefaultContext =
+    {
+        "australian business number",
+        "abn",
+    };
+
+    private static readonly int[] Weights = { 10, 1, 3, 5, 7, 9, 11, 13, 15, 17, 19 };
+
+    private readonly IReadOnlyList<(string Search, string Replacement)> _replacementPairs =
+        replacementPairs?.ToArray() ?? new[]
+        {
+            ("-", string.Empty),
+            (" ", string.Empty),
+        };
+
+    protected override bool? ValidateResult(string patternText)
+    {
+        if (string.IsNullOrWhiteSpace(patternText))
+        {
+            return false;
+        }
+
+        var sanitized = EntityRecognizer.SanitizeValue(patternText, _replacementPairs);
+        if (sanitized.Length != 11 || sanitized.Any(ch => !char.IsDigit(ch)))
+        {
+            return false;
+        }
+
+        Span<int> digits = stackalloc int[11];
+        for (var i = 0; i < sanitized.Length; i++)
+        {
+            digits[i] = sanitized[i] - '0';
+        }
+
+        digits[0] = digits[0] == 0 ? 9 : digits[0] - 1;
+
+        var sum = 0;
+        for (var i = 0; i < Weights.Length; i++)
+        {
+            sum += digits[i] * Weights[i];
+        }
+
+        return sum % 89 == 0;
+    }
+}
@@ -0,0 +1,67 @@
+namespace ManagedCode.Presidio.Analyzer;
+
+/// <summary>
+/// Recognizes Australian Company Numbers (ACN) with checksum validation.
+/// </summary>
+public sealed class AuAcnRecognizer(
+    IEnumerable<Pattern>? patterns = null,
+    IEnumerable<string>? context = null,
+    string supportedLanguage = "en",
+    string supportedEntity = "AU_ACN",
+    IEnumerable<(string Search, string Replacement)>? replacementPairs = null) : PatternRecognizer(
+        supportedEntity,
+        patterns ?? DefaultPatterns,
+        context: context ?? DefaultContext,
+        supportedLanguage: supportedLanguage)
+{
+    private static readonly Pattern[] DefaultPatterns =
+    {
+        new("ACN (Medium)", @"\b\d{3}\s\d{3}\s\d{3}\b", 0.1),
+        new("ACN (Low)", @"\b\d{9}\b", 0.01),
+    };
+
+    private static readonly string[] DefaultContext =
+    {
+        "australian company number",
+        "acn",
+    };
+
+    private static readonly int[] Weights = { 8, 7, 6, 5, 4, 3, 2, 1 };
+
+    private readonly IReadOnlyList<(string Search, string Replacement)> _replacementPairs =
+        replacementPairs?.ToArray() ?? new[]
+        {
+            ("-", string.Empty),
+            (" ", string.Empty),
+        };
+
+    protected override bool? ValidateResult(string patternText)
+    {
+        if (string.IsNullOrWhiteSpace(patternText))
+        {
+            return false;
+        }
+
+        var sanitized = EntityRecognizer.SanitizeValue(patternText, _replacementPairs);
+        if (sanitized.Length != 9 || sanitized.Any(ch => !char.IsDigit(ch)))
+        {
+            return false;
+        }
+
+        Span<int> digits = stackalloc int[9];
+        for (var i = 0; i < sanitized.Length; i++)
+        {
+            digits[i] = sanitized[i] - '0';
+        }
+
+        var sum = 0;
+        for (var i = 0; i < Weights.Length; i++)
+        {
+            sum += digits[i] * Weights[i];
+        }
+
+        var remainder = sum % 10;
+        var checkDigit = (10 - remainder) % 10;
+        return checkDigit == digits[^1];
+    }
+}
@@ -0,0 +1,65 @@
+namespace ManagedCode.Presidio.Analyzer;
+
+/// <summary>
+/// Recognizes Australian Medicare numbers using checksum validation.
+/// </summary>
+public sealed class AuMedicareRecognizer(
+    IEnumerable<Pattern>? patterns = null,
+    IEnumerable<string>? context = null,
+    string supportedLanguage = "en",
+    string supportedEntity = "AU_MEDICARE",
+    IEnumerable<(string Search, string Replacement)>? replacementPairs = null) : PatternRecognizer(
+        supportedEntity,
+        patterns ?? DefaultPatterns,
+        context: context ?? DefaultContext,
+        supportedLanguage: supportedLanguage)
+{
+    private static readonly Pattern[] DefaultPatterns =
+    {
+        new("Medicare (Medium)", @"\b\d{4}\s\d{5}\s\d\b", 0.1),
+        new("Medicare (Low)", @"\b\d{10}\b", 0.01),
+    };
+
+    private static readonly string[] DefaultContext =
+    {
+        "medicare",
+        "australian medicare",
+    };
+
+    private static readonly int[] Weights = { 1, 3, 7, 9, 1, 3, 7, 9 };
+
+    private readonly IReadOnlyList<(string Search, string Replacement)> _replacementPairs =
+        replacementPairs?.ToArray() ?? new[]
+        {
+            ("-", string.Empty),
+            (" ", string.Empty),
+        };
+
+    protected override bool? ValidateResult(string patternText)
+    {
+        if (string.IsNullOrWhiteSpace(patternText))
+        {
+            return false;
+        }
+
+        var sanitized = EntityRecognizer.SanitizeValue(patternText, _replacementPairs);
+        if (sanitized.Length != 10 || sanitized.Any(ch => !char.IsDigit(ch)))
+        {
+            return false;
+        }
+
+        Span<int> digits = stackalloc int[10];
+        for (var i = 0; i < sanitized.Length; i++)
+        {
+            digits[i] = sanitized[i] - '0';
+        }
+
+        var sum = 0;
+        for (var i = 0; i < Weights.Length; i++)
+        {
+            sum += digits[i] * Weights[i];
+        }
+
+        return (sum % 10) == digits[8];
+    }
+}
@@ -0,0 +1,65 @@
+namespace ManagedCode.Presidio.Analyzer;
+
+/// <summary>
+/// Recognizes Australian Tax File Numbers (TFN) with checksum validation.
+/// </summary>
+public sealed class AuTfnRecognizer(
+    IEnumerable<Pattern>? patterns = null,
+    IEnumerable<string>? context = null,
+    string supportedLanguage = "en",
+    string supportedEntity = "AU_TFN",
+    IEnumerable<(string Search, string Replacement)>? replacementPairs = null) : PatternRecognizer(
+        supportedEntity,
+        patterns ?? DefaultPatterns,
+        context: context ?? DefaultContext,
+        supportedLanguage: supportedLanguage)
+{
+    private static readonly Pattern[] DefaultPatterns =
+    {
+        new("TFN (Medium)", @"\b\d{3}\s\d{3}\s\d{3}\b", 0.1),
+        new("TFN (Low)", @"\b\d{9}\b", 0.01),
+    };
+
+    private static readonly string[] DefaultContext =
+    {
+        "tax file number",
+        "tfn",
+    };
+
+    private static readonly int[] Weights = { 1, 4, 3, 7, 5, 8, 6, 9, 10 };
+
+    private readonly IReadOnlyList<(string Search, string Replacement)> _replacementPairs =
+        replacementPairs?.ToArray() ?? new[]
+        {
+            ("-", string.Empty),
+            (" ", string.Empty),
+        };
+
+    protected override bool? ValidateResult(string patternText)
+    {
+        if (string.IsNullOrWhiteSpace(patternText))
+        {
+            return false;
+        }
+
+        var sanitized = EntityRecognizer.SanitizeValue(patternText, _replacementPairs);
+        if (sanitized.Length != 9 || sanitized.Any(ch => !char.IsDigit(ch)))
+        {
+            return false;
+        }
+
+        Span<int> digits = stackalloc int[9];
+        for (var i = 0; i < sanitized.Length; i++)
+        {
+            digits[i] = sanitized[i] - '0';
+        }
+
+        var sum = 0;
+        for (var i = 0; i < Weights.Length; i++)
+        {
+            sum += digits[i] * Weights[i];
+        }
+
+        return sum % 11 == 0;
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -38,10 +38,8 @@ public AnalyzerEngine(`
`38`	`38`
`39`	`39`	`if (registry is null)`
`40`	`40`	`{`
`41`		`- var provider = new RecognizerRegistryProvider(`
`42`		`- new RecognizerRegistryConfiguration(_supportedLanguages));`
`43`		`- _registry = provider.CreateRecognizerRegistry();`
`44`		`- _registry.AddNlpRecognizer(_nlpEngine);`
	`41`	`+ var provider = new RecognizerRegistryProvider();`
	`42`	`+ _registry = provider.CreateRecognizerRegistry(_nlpEngine, _supportedLanguages);`
`45`	`43`	`}`
`46`	`44`	`else`
`47`	`45`	`{`
`@@ -54,8 +52,6 @@ public AnalyzerEngine(`
`54`	`52`	`_registry = registry;`
`55`	`53`	`}`
`56`	`54`
`57`		`- _registry.LoadPredefinedRecognizers(_nlpEngine, _supportedLanguages);`
`58`		`-`
`59`	`55`	`_contextAwareEnhancer = contextAwareEnhancer ?? new LemmaContextAwareEnhancer();`
`60`	`56`	`}`
`61`	`57`
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +using System.Runtime.CompilerServices;
++
 +[assembly: InternalsVisibleTo("ManagedCode.Presidio.Analyzer.Tests")]
++