Skip to content

Commit a31e41c

Browse files
committed
feat: Add BatchAnonymizerEngine and DeanonymizeEngine with corresponding tests
1 parent ce9ecbb commit a31e41c

File tree

6 files changed

+614
-0
lines changed

6 files changed

+614
-0
lines changed
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
using System.Collections;
2+
using System.Globalization;
3+
using CoreRecognizerResult = ManagedCode.Presidio.Core.RecognizerResult;
4+
5+
namespace ManagedCode.Presidio.Anonymizer;
6+
7+
/// <summary>
8+
/// Provides batch anonymization helpers mirroring the Python BatchAnonymizerEngine.
9+
/// </summary>
10+
public sealed class BatchAnonymizerEngine(AnonymizerEngine? anonymizerEngine = null)
11+
{
12+
private readonly AnonymizerEngine _anonymizerEngine = anonymizerEngine ?? new AnonymizerEngine();
13+
14+
public IReadOnlyList<object?> AnonymizeList(
15+
IReadOnlyList<object?> texts,
16+
IReadOnlyList<IReadOnlyList<RecognizerResult>> recognizerResultsList,
17+
IDictionary<string, OperatorConfig>? operators = null,
18+
ConflictResolutionStrategy conflictResolution = ConflictResolutionStrategy.MergeSimilarOrContained)
19+
{
20+
ArgumentNullException.ThrowIfNull(texts);
21+
22+
var results = NormalizeRecognizerResultsList(recognizerResultsList, texts.Count);
23+
var output = new List<object?>(texts.Count);
24+
25+
for (var i = 0; i < texts.Count; i++)
26+
{
27+
var current = texts[i];
28+
if (current is string or bool or int or long or float or double or decimal)
29+
{
30+
var text = Convert.ToString(current, CultureInfo.InvariantCulture) ?? string.Empty;
31+
var engineResult = _anonymizerEngine.Anonymize(
32+
text,
33+
ConvertToCoreResults(results[i]),
34+
operators,
35+
conflictResolution);
36+
37+
output.Add(engineResult.Text ?? string.Empty);
38+
}
39+
else
40+
{
41+
output.Add(current);
42+
}
43+
}
44+
45+
return output;
46+
}
47+
48+
public IDictionary<string, object?> AnonymizeDict(
49+
IEnumerable<DictRecognizerResult> analyzerResults,
50+
IDictionary<string, OperatorConfig>? operators = null,
51+
ConflictResolutionStrategy conflictResolution = ConflictResolutionStrategy.MergeSimilarOrContained)
52+
{
53+
ArgumentNullException.ThrowIfNull(analyzerResults);
54+
55+
var result = new Dictionary<string, object?>();
56+
foreach (var entry in analyzerResults)
57+
{
58+
switch (entry.Value)
59+
{
60+
case IDictionary<string, object?>:
61+
{
62+
var nestedResults = EnsureDictResults(entry.RecognizerResults);
63+
result[entry.Key] = AnonymizeDict(nestedResults, operators, conflictResolution);
64+
break;
65+
}
66+
67+
case string stringValue:
68+
{
69+
var recognizers = EnsureRecognizerResults(entry.RecognizerResults);
70+
var engineResult = _anonymizerEngine.Anonymize(stringValue, ConvertToCoreResults(recognizers), operators, conflictResolution);
71+
result[entry.Key] = engineResult.Text;
72+
break;
73+
}
74+
75+
case IEnumerable enumerable when entry.Value is not string:
76+
{
77+
var recognizers = EnsureRecognizerResultsList(entry.RecognizerResults);
78+
var values = enumerable.Cast<object?>().ToList();
79+
var anonymized = AnonymizeList(values, recognizers, operators, conflictResolution);
80+
result[entry.Key] = anonymized;
81+
break;
82+
}
83+
84+
default:
85+
result[entry.Key] = entry.Value;
86+
break;
87+
}
88+
}
89+
90+
return result;
91+
}
92+
93+
private static List<IReadOnlyList<RecognizerResult>> NormalizeRecognizerResultsList(
94+
IReadOnlyList<IReadOnlyList<RecognizerResult>> recognizerResultsList,
95+
int desiredLength)
96+
{
97+
var results = recognizerResultsList?.ToList() ?? new List<IReadOnlyList<RecognizerResult>>();
98+
if (results.Count == 0)
99+
{
100+
for (var i = 0; i < desiredLength; i++)
101+
{
102+
results.Add(Array.Empty<RecognizerResult>());
103+
}
104+
}
105+
else if (results.Count != desiredLength)
106+
{
107+
throw new ArgumentException("Recognizer results list must align with texts count.", nameof(recognizerResultsList));
108+
}
109+
110+
return results;
111+
}
112+
113+
private static IReadOnlyList<IReadOnlyList<RecognizerResult>> EnsureRecognizerResultsList(object? recognizerResults)
114+
{
115+
if (recognizerResults is null)
116+
{
117+
return Array.Empty<IReadOnlyList<RecognizerResult>>();
118+
}
119+
120+
if (recognizerResults is IEnumerable<IReadOnlyList<RecognizerResult>> enumerable)
121+
{
122+
return enumerable.ToList();
123+
}
124+
125+
throw new ArgumentException("Recognizer results must be a list of recognizer result collections for enumerable values.");
126+
}
127+
128+
private static IReadOnlyList<RecognizerResult> EnsureRecognizerResults(object? recognizerResults)
129+
{
130+
if (recognizerResults is null)
131+
{
132+
return Array.Empty<RecognizerResult>();
133+
}
134+
135+
if (recognizerResults is IEnumerable<RecognizerResult> enumerable)
136+
{
137+
return enumerable.ToList();
138+
}
139+
140+
throw new ArgumentException("Recognizer results must be a list of recognizer results for string values.");
141+
}
142+
143+
private static IEnumerable<DictRecognizerResult> EnsureDictResults(object? recognizerResults)
144+
{
145+
if (recognizerResults is null)
146+
{
147+
return Array.Empty<DictRecognizerResult>();
148+
}
149+
150+
if (recognizerResults is IEnumerable<DictRecognizerResult> enumerable)
151+
{
152+
return enumerable;
153+
}
154+
155+
throw new ArgumentException("Recognizer results must be an enumerable of DictRecognizerResult for nested dictionaries.");
156+
}
157+
158+
private static IReadOnlyCollection<CoreRecognizerResult> ConvertToCoreResults(IReadOnlyList<RecognizerResult> results)
159+
{
160+
if (results.Count == 0)
161+
{
162+
return Array.Empty<CoreRecognizerResult>();
163+
}
164+
165+
var converted = new List<CoreRecognizerResult>(results.Count);
166+
foreach (var result in results)
167+
{
168+
converted.Add(result.ToCore());
169+
}
170+
171+
return converted;
172+
}
173+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
namespace ManagedCode.Presidio.Anonymizer;
2+
3+
/// <summary>
4+
/// Executes deanonymization operations using registered operators.
5+
/// Mirrors the behaviour of the Python DeanonymizeEngine.
6+
/// </summary>
7+
public sealed class DeanonymizeEngine : EngineBase
8+
{
9+
public EngineResult Deanonymize(
10+
string text,
11+
IReadOnlyCollection<OperatorResult> entities,
12+
IDictionary<string, OperatorConfig> operators)
13+
{
14+
ArgumentNullException.ThrowIfNull(text);
15+
ArgumentNullException.ThrowIfNull(entities);
16+
ArgumentNullException.ThrowIfNull(operators);
17+
18+
return Operate(text, entities, operators, OperatorType.Deanonymize);
19+
}
20+
21+
public IReadOnlyCollection<string> GetDeanonymizers() => OperatorsFactory.GetDeanonymizers().Keys.ToArray();
22+
23+
public void AddDeanonymizer(Type operatorType) => OperatorsFactory.AddDeanonymizeOperator(operatorType);
24+
25+
public void RemoveDeanonymizer(Type operatorType) => OperatorsFactory.RemoveDeanonymizeOperator(operatorType);
26+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
namespace ManagedCode.Presidio.Anonymizer;
2+
3+
/// <summary>
4+
/// Represents analyzer output for dictionary structured inputs.
5+
/// Mirrors the Python <c>DictRecognizerResult</c> dataclass.
6+
/// </summary>
7+
public sealed class DictRecognizerResult(string key, object? value, object? recognizerResults)
8+
{
9+
public string Key { get; } = key ?? throw new ArgumentNullException(nameof(key));
10+
11+
public object? Value { get; } = value;
12+
13+
public object? RecognizerResults { get; } = recognizerResults;
14+
}

src/ManagedCode.Presidio.Anonymizer/RecognizerResult.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,4 +83,12 @@ public static RecognizerResult FromCore(ManagedCode.Presidio.Core.RecognizerResu
8383
ArgumentNullException.ThrowIfNull(result);
8484
return new RecognizerResult(result.EntityType, result.Start, result.End, result.Score);
8585
}
86+
87+
public ManagedCode.Presidio.Core.RecognizerResult ToCore()
88+
{
89+
return new ManagedCode.Presidio.Core.RecognizerResult(
90+
EntityType,
91+
new ManagedCode.Presidio.Core.TextSpan(Start, End),
92+
Score);
93+
}
8694
}

0 commit comments

Comments
 (0)