From ef47838c41f48ad248e94b00e6a6bf0ad5b5bb60 Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 11:40:38 -0500 Subject: [PATCH 1/7] more benchmarks including cold start --- .../Benchmarks/AsYouTypeFormatterBenchmark.cs | 48 ++++++++++ .../Benchmarks/ColdStartBenchmark.cs | 91 +++++++++++++++++++ .../Benchmarks/ParsingHelpersBenchmark.cs | 58 ++++++++++++ .../Benchmarks/PhoneNumberMatcherBenchmark.cs | 58 ++++++++++++ csharp/PhoneNumbers.PerformanceTest/README.md | 13 +++ csharp/PhoneNumbers/Util.cs | 1 + 6 files changed, 269 insertions(+) create mode 100644 csharp/PhoneNumbers.PerformanceTest/Benchmarks/AsYouTypeFormatterBenchmark.cs create mode 100644 csharp/PhoneNumbers.PerformanceTest/Benchmarks/ColdStartBenchmark.cs create mode 100644 csharp/PhoneNumbers.PerformanceTest/Benchmarks/ParsingHelpersBenchmark.cs create mode 100644 csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs diff --git a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/AsYouTypeFormatterBenchmark.cs b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/AsYouTypeFormatterBenchmark.cs new file mode 100644 index 00000000..fd39e917 --- /dev/null +++ b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/AsYouTypeFormatterBenchmark.cs @@ -0,0 +1,48 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; + +namespace PhoneNumbers.PerformanceTest.Benchmarks +{ + [MemoryDiagnoser] + [SimpleJob(RuntimeMoniker.Net48)] + [SimpleJob(RuntimeMoniker.Net80)] + [SimpleJob(RuntimeMoniker.Net90)] + public class AsYouTypeFormatterBenchmark + { +#if NETFRAMEWORK + private PhoneNumberUtil _phoneNumberUtil = null; + private PhoneNumberBenchmarkCase[] _phoneNumbers = null; +#else + private PhoneNumberUtil _phoneNumberUtil = null!; + private PhoneNumberBenchmarkCase[] _phoneNumbers = null!; +#endif + + [Params(1000, 10000)] + public int PhoneNumberCount { get; set; } + + [GlobalSetup] + public void Setup() + { + _phoneNumberUtil = PhoneNumberUtil.GetInstance(); + _phoneNumbers = PhoneNumberBenchmarkData.Create(_phoneNumberUtil, PhoneNumberCount); + } + + [Benchmark] + public int InputDigitPerKeystroke() + { + var checksum = 0; + + for (var i = 0; i < _phoneNumbers.Length; i++) + { + var phoneNumber = _phoneNumbers[i]; + var formatter = _phoneNumberUtil.GetAsYouTypeFormatter(phoneNumber.DefaultRegion); + + var input = phoneNumber.NumberToParse; + for (var c = 0; c < input.Length; c++) + checksum += formatter.InputDigit(input[c]).Length; + } + + return checksum; + } + } +} diff --git a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ColdStartBenchmark.cs b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ColdStartBenchmark.cs new file mode 100644 index 00000000..31731497 --- /dev/null +++ b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ColdStartBenchmark.cs @@ -0,0 +1,91 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Engines; +using BenchmarkDotNet.Jobs; + +namespace PhoneNumbers.PerformanceTest.Benchmarks +{ + /// + /// Cold-start measurements. Each invocation builds a fresh so the + /// embedded-resource metadata cache is empty — this is the cost a consumer pays on their first + /// use of the library, before any region metadata has been loaded. + /// + [MemoryDiagnoser] + [SimpleJob(RunStrategy.ColdStart, RuntimeMoniker.Net48, launchCount: 1, warmupCount: 1, iterationCount: 20, invocationCount: 1)] + [SimpleJob(RunStrategy.ColdStart, RuntimeMoniker.Net80, launchCount: 1, warmupCount: 1, iterationCount: 20, invocationCount: 1)] + [SimpleJob(RunStrategy.ColdStart, RuntimeMoniker.Net90, launchCount: 1, warmupCount: 1, iterationCount: 20, invocationCount: 1)] + public class ColdStartBenchmark + { + // The country-code-to-region map and one fresh PhoneNumberUtil are kept around so the + // FirstRegionLookup benchmark has a pre-constructed util whose region cache has NOT been + // touched for the target region (we pick a region we never look up during setup). +#if NETFRAMEWORK + private PhoneNumberUtil _warmInstance = null; + private string[] _supportedRegions = null; +#else + private PhoneNumberUtil _warmInstance = null!; + private string[] _supportedRegions = null!; +#endif + + // Region selected for FirstRegionLookup. Chosen as a small-but-real region so its metadata + // payload size is representative of the average region rather than an outlier like US/CN. + private const string TargetRegion = "CH"; + + [GlobalSetup] + public void Setup() + { + // Force JIT of the metadata-loading path so we measure steady-state cold-start cost + // rather than first-ever-invocation JIT noise. We deliberately use a different region + // than TargetRegion so the per-region cache stays cold for that one in FirstRegionLookup. + _warmInstance = PhoneNumberUtil.GetInstance(); + _supportedRegions = new string[_warmInstance.GetSupportedRegions().Count]; + _warmInstance.GetSupportedRegions().CopyTo(_supportedRegions); + } + + /// + /// Bare construction: builds the country-code map and runs the constructor. No region + /// metadata is loaded — that all happens lazily on first . + /// + [Benchmark] + public PhoneNumberUtil CreateInstance() + { + return new PhoneNumberUtil( + new EmbeddedResourceMetadataLoader(), + CountryCodeToRegionCodeMap.GetCountryCodeToRegionCodeMap()); + } + + /// + /// Construct + force-load every region's metadata. Represents a long-running process that + /// will eventually touch every region — the total cold cost they pay across their lifetime. + /// + [Benchmark] + public int CreateInstanceAndLoadAllRegions() + { + var util = new PhoneNumberUtil( + new EmbeddedResourceMetadataLoader(), + CountryCodeToRegionCodeMap.GetCountryCodeToRegionCodeMap()); + + var checksum = 0; + for (var i = 0; i < _supportedRegions.Length; i++) + { + var meta = util.GetMetadataForRegion(_supportedRegions[i]); + if (meta != null) + checksum++; + } + return checksum; + } + + /// + /// Isolated per-region lazy load against a pre-constructed instance. Builds one fresh util + /// per invocation so hits the binary + /// loader instead of the in-memory cache. + /// + [Benchmark] + public PhoneMetadata FirstRegionLookup() + { + var util = new PhoneNumberUtil( + new EmbeddedResourceMetadataLoader(), + CountryCodeToRegionCodeMap.GetCountryCodeToRegionCodeMap()); + return util.GetMetadataForRegion(TargetRegion); + } + } +} diff --git a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ParsingHelpersBenchmark.cs b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ParsingHelpersBenchmark.cs new file mode 100644 index 00000000..df46f3b4 --- /dev/null +++ b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/ParsingHelpersBenchmark.cs @@ -0,0 +1,58 @@ +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; + +namespace PhoneNumbers.PerformanceTest.Benchmarks +{ + [MemoryDiagnoser] + [SimpleJob(RuntimeMoniker.Net48)] + [SimpleJob(RuntimeMoniker.Net80)] + [SimpleJob(RuntimeMoniker.Net90)] + public class ParsingHelpersBenchmark + { +#if NETFRAMEWORK + private string[] _inputs = null; + private string[] _inputsWithLeadingJunk = null; +#else + private string[] _inputs = null!; + private string[] _inputsWithLeadingJunk = null!; +#endif + + [Params(1000, 10000)] + public int PhoneNumberCount { get; set; } + + [GlobalSetup] + public void Setup() + { + var phoneNumberUtil = PhoneNumberUtil.GetInstance(); + var cases = PhoneNumberBenchmarkData.Create(phoneNumberUtil, PhoneNumberCount); + + _inputs = new string[cases.Length]; + _inputsWithLeadingJunk = new string[cases.Length]; + for (var i = 0; i < cases.Length; i++) + { + _inputs[i] = cases[i].NumberToParse; + // Forces ExtractPossibleNumber to actually slice (the common "clean input" case + // is measured separately by _inputs). + _inputsWithLeadingJunk[i] = "abc " + cases[i].NumberToParse; + } + } + + [Benchmark] + public int ExtractPossibleNumber_CleanInput() + { + var checksum = 0; + for (var i = 0; i < _inputs.Length; i++) + checksum += PhoneNumberUtil.ExtractPossibleNumber(_inputs[i]).Length; + return checksum; + } + + [Benchmark] + public int ExtractPossibleNumber_WithLeadingJunk() + { + var checksum = 0; + for (var i = 0; i < _inputsWithLeadingJunk.Length; i++) + checksum += PhoneNumberUtil.ExtractPossibleNumber(_inputsWithLeadingJunk[i]).Length; + return checksum; + } + } +} diff --git a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs new file mode 100644 index 00000000..a0294408 --- /dev/null +++ b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs @@ -0,0 +1,58 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using BenchmarkDotNet.Jobs; + +namespace PhoneNumbers.PerformanceTest.Benchmarks +{ + [MemoryDiagnoser] + [SimpleJob(RuntimeMoniker.Net48)] + [SimpleJob(RuntimeMoniker.Net80)] + [SimpleJob(RuntimeMoniker.Net90)] + public class PhoneNumberMatcherBenchmark + { + // Filler text interleaved between embedded numbers so the matcher has to skip non-number + // content. Kept short to keep total input length proportional to PhoneNumberCount. + private const string Filler = " Lorem ipsum dolor sit amet, consectetur adipiscing elit. Call "; + +#if NETFRAMEWORK + private PhoneNumberUtil _phoneNumberUtil = null; + private string _defaultRegion = null; + private string _text = null; +#else + private PhoneNumberUtil _phoneNumberUtil = null!; + private string _defaultRegion = null!; + private string _text = null!; +#endif + + [Params(100, 1000)] + public int PhoneNumberCount { get; set; } + + [GlobalSetup] + public void Setup() + { + _phoneNumberUtil = PhoneNumberUtil.GetInstance(); + var cases = PhoneNumberBenchmarkData.Create(_phoneNumberUtil, PhoneNumberCount); + + // FindNumbers takes a single default region. Pick the most common one in the seed + // set so a meaningful share of the numbers parse against region-local formats. + _defaultRegion = cases[0].DefaultRegion; + + var sb = new StringBuilder(PhoneNumberCount * (Filler.Length + 16)); + for (var i = 0; i < cases.Length; i++) + { + sb.Append(Filler); + sb.Append(cases[i].NumberToParse); + } + _text = sb.ToString(); + } + + [Benchmark] + public int FindNumbers() + { + var checksum = 0; + foreach (var match in _phoneNumberUtil.FindNumbers(_text, _defaultRegion)) + checksum += match.RawString.Length; + return checksum; + } + } +} diff --git a/csharp/PhoneNumbers.PerformanceTest/README.md b/csharp/PhoneNumbers.PerformanceTest/README.md index b9aa99cf..73d03f2c 100644 --- a/csharp/PhoneNumbers.PerformanceTest/README.md +++ b/csharp/PhoneNumbers.PerformanceTest/README.md @@ -24,6 +24,19 @@ dotnet run -c Release --framework net9.0 -- --filter "*PhoneNumberWorkflowBenchm The full benchmark includes the `100000` phone-number data set and may take several minutes, especially when multiple runtime jobs are available on the machine. +Other available benchmarks: + +- `*AsYouTypeFormatterBenchmark*` — per-keystroke cost of `AsYouTypeFormatter.InputDigit` over + a representative set of regional numbers. +- `*PhoneNumberMatcherBenchmark*` — `PhoneNumberUtil.FindNumbers` over a synthetic text body + with phone numbers embedded between filler sentences. +- `*ParsingHelpersBenchmark*` — `PhoneNumberUtil.ExtractPossibleNumber` measured separately + for clean inputs (no leading junk) and inputs that force the strip path. +- `*ColdStartBenchmark*` — cost a consumer pays the first time they touch the library: bare + `PhoneNumberUtil` construction, construction plus lazy-load of every region's metadata, + and an isolated first-region lookup. Uses BDN's `RunStrategy.ColdStart` with + `invocationCount: 1` so each measurement is a genuine first-use, not a steady-state loop. + The benchmark data is generated from valid example numbers in the bundled metadata and expanded deterministically to the configured `PhoneNumberCount` values, up to 100,000 inputs. Each benchmark iteration parses, validates, and formats every number in that data set. diff --git a/csharp/PhoneNumbers/Util.cs b/csharp/PhoneNumbers/Util.cs index c4d9c17a..de871929 100644 --- a/csharp/PhoneNumbers/Util.cs +++ b/csharp/PhoneNumbers/Util.cs @@ -21,6 +21,7 @@ [assembly: InternalsVisibleTo("PhoneNumbers.Test")] [assembly: InternalsVisibleTo("PhoneNumbers.MetadataBuilder")] +[assembly: InternalsVisibleTo("PhoneNumbers.PerformanceTest")] namespace PhoneNumbers { From 4e93aad1243c5ad65dd13a5e3c8a6488e55d6a84 Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 12:17:26 -0500 Subject: [PATCH 2/7] performance tweaks --- .../Benchmarks/PhoneNumberMatcherBenchmark.cs | 14 +++++++++++++- csharp/PhoneNumbers/AsYouTypeFormatter.cs | 8 +++++++- csharp/PhoneNumbers/PhoneNumberMatcher.cs | 16 ++++++++++------ 3 files changed, 30 insertions(+), 8 deletions(-) diff --git a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs index a0294408..004aa0dd 100644 --- a/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs +++ b/csharp/PhoneNumbers.PerformanceTest/Benchmarks/PhoneNumberMatcherBenchmark.cs @@ -47,12 +47,24 @@ public void Setup() } [Benchmark] - public int FindNumbers() + public int FindNumbers_Valid() { var checksum = 0; foreach (var match in _phoneNumberUtil.FindNumbers(_text, _defaultRegion)) checksum += match.RawString.Length; return checksum; } + + // STRICT_GROUPING exercises AllNumberGroupsRemainGrouped, which the default VALID leniency + // does not. Useful to measure the matcher's group-formatting validation path. + [Benchmark] + public int FindNumbers_StrictGrouping() + { + var checksum = 0; + foreach (var match in _phoneNumberUtil.FindNumbers(_text, _defaultRegion, + PhoneNumberUtil.Leniency.STRICT_GROUPING, long.MaxValue)) + checksum += match.RawString.Length; + return checksum; + } } } diff --git a/csharp/PhoneNumbers/AsYouTypeFormatter.cs b/csharp/PhoneNumbers/AsYouTypeFormatter.cs index 87b6419b..bef08b37 100644 --- a/csharp/PhoneNumbers/AsYouTypeFormatter.cs +++ b/csharp/PhoneNumbers/AsYouTypeFormatter.cs @@ -632,7 +632,13 @@ private bool AttemptToExtractIdd() isCompleteNumber = true; var startOfCountryCallingCode = iddMatcher.Length; nationalNumber.Length = 0; - nationalNumber.Append(accruedInputWithoutFormatting.ToString().Substring(startOfCountryCallingCode)); +#if NETSTANDARD2_0 + for (var k = startOfCountryCallingCode; k < accruedInputWithoutFormatting.Length; k++) + nationalNumber.Append(accruedInputWithoutFormatting[k]); +#else + nationalNumber.Append(accruedInputWithoutFormatting, startOfCountryCallingCode, + accruedInputWithoutFormatting.Length - startOfCountryCallingCode); +#endif prefixBeforeNationalNumber.Length = 0; prefixBeforeNationalNumber.Append(iddMatcher.Value); if (accruedInputWithoutFormatting[0] != PhoneNumberUtil.PLUS_SIGN) diff --git a/csharp/PhoneNumbers/PhoneNumberMatcher.cs b/csharp/PhoneNumbers/PhoneNumberMatcher.cs index 485efb50..bf9f1096 100644 --- a/csharp/PhoneNumbers/PhoneNumberMatcher.cs +++ b/csharp/PhoneNumbers/PhoneNumberMatcher.cs @@ -442,37 +442,41 @@ public static bool AllNumberGroupsRemainGrouped(PhoneNumberUtil util, IList formattedNumberGroups) { var fromIndex = 0; + var candidate = normalizedCandidate.ToString(); // Check each group of consecutive digits are not broken into separate groupings in the // normalizedCandidate string. for (var i = 0; i < formattedNumberGroups.Count; i++) { // Fails if the substring of {@code normalizedCandidate} starting from {@code fromIndex} // doesn't contain the consecutive digits in formattedNumberGroups[i]. - fromIndex = normalizedCandidate.ToString().IndexOf(formattedNumberGroups[i], fromIndex, StringComparison.Ordinal); + fromIndex = candidate.IndexOf(formattedNumberGroups[i], fromIndex, StringComparison.Ordinal); if (fromIndex < 0) { return false; } // Moves {@code fromIndex} forward. fromIndex += formattedNumberGroups[i].Length; - if (i == 0 && fromIndex < normalizedCandidate.Length) + if (i == 0 && fromIndex < candidate.Length) { // We are at the position right after the NDC. - if (char.IsDigit(normalizedCandidate[fromIndex])) + if (char.IsDigit(candidate[fromIndex])) { // This means there is no formatting symbol after the NDC. In this case, we only // accept the number if there is no formatting symbol at all in the number, except // for extensions. var nationalSignificantNumber = util.GetNationalSignificantNumber(number); - return normalizedCandidate.ToString().Substring(fromIndex - formattedNumberGroups[i].Length) - .StartsWith(nationalSignificantNumber, StringComparison.Ordinal); + var nsnStart = fromIndex - formattedNumberGroups[i].Length; + if (candidate.Length - nsnStart < nationalSignificantNumber.Length) + return false; + return string.Compare(candidate, nsnStart, nationalSignificantNumber, 0, + nationalSignificantNumber.Length, StringComparison.Ordinal) == 0; } } } // The check here makes sure that we haven't mistakenly already used the extension to // match the last group of the subscriber number. Note the extension cannot have // formatting in-between digits. - return normalizedCandidate.ToString().Substring(fromIndex).Contains(number.Extension); + return candidate.IndexOf(number.Extension, fromIndex, StringComparison.Ordinal) >= 0; } public static bool AllNumberGroupsAreExactlyPresent(PhoneNumberUtil util, From 13513e29cf34340780b535531c40e30277f48aed Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 12:25:01 -0500 Subject: [PATCH 3/7] make aot compatible --- csharp/PhoneNumbers/PhoneNumberUtil.cs | 4 ++++ csharp/PhoneNumbers/PhoneNumbers.csproj | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/csharp/PhoneNumbers/PhoneNumberUtil.cs b/csharp/PhoneNumbers/PhoneNumberUtil.cs index 79ce721a..1b503af3 100644 --- a/csharp/PhoneNumbers/PhoneNumberUtil.cs +++ b/csharp/PhoneNumbers/PhoneNumberUtil.cs @@ -994,7 +994,11 @@ private static bool DescHasData(PhoneNumberDesc desc) private HashSet GetSupportedTypesForMetadata(PhoneMetadata metadata) { var types = new HashSet(); +#if NETSTANDARD2_0 foreach (PhoneNumberType type in Enum.GetValues(typeof(PhoneNumberType))) +#else + foreach (var type in Enum.GetValues()) +#endif { if (type == PhoneNumberType.FIXED_LINE_OR_MOBILE || type == PhoneNumberType.UNKNOWN) { diff --git a/csharp/PhoneNumbers/PhoneNumbers.csproj b/csharp/PhoneNumbers/PhoneNumbers.csproj index eeae05b0..3486b83c 100644 --- a/csharp/PhoneNumbers/PhoneNumbers.csproj +++ b/csharp/PhoneNumbers/PhoneNumbers.csproj @@ -27,6 +27,12 @@ enable + + true From 4e43235d6a1629193516a3e2f2a477f0109a10ea Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 13:03:01 -0500 Subject: [PATCH 4/7] restore compression --- csharp/PhoneNumbers.MetadataBuilder/Program.cs | 10 +++++++--- csharp/PhoneNumbers/MetadataLoader.cs | 9 ++++++++- csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs | 4 +++- csharp/PhoneNumbers/PrefixFileReader.cs | 4 +++- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/csharp/PhoneNumbers.MetadataBuilder/Program.cs b/csharp/PhoneNumbers.MetadataBuilder/Program.cs index b87a3e26..e544f8c9 100644 --- a/csharp/PhoneNumbers.MetadataBuilder/Program.cs +++ b/csharp/PhoneNumbers.MetadataBuilder/Program.cs @@ -12,6 +12,7 @@ using System.Collections.Generic; using System.Globalization; using System.IO; +using System.IO.Compression; using System.Security.Cryptography; using System.Text; using System.Threading; @@ -138,7 +139,8 @@ private static int BuildGeocoding(string inputDir, string outputDir) var map = ParseAreaCodeText(txtPath); var outPath = Path.Combine(outputDir, $"{lang}.{countryCode}"); using var fs = File.Create(outPath); - BuildPrefixMapFromBin.WriteAreaCodeMap(fs, map); + using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + BuildPrefixMapFromBin.WriteAreaCodeMap(gz, map); written++; } } @@ -163,7 +165,8 @@ private static int BuildTimezones(string inputFile, string outputFile) var map = ParseTimezoneText(inputFile, splitter: '&'); using var fs = File.Create(outputFile); - BuildPrefixMapFromBin.WriteTimezoneMap(fs, map); + using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + BuildPrefixMapFromBin.WriteTimezoneMap(gz, map); Console.Out.WriteLine($"PhoneNumbers.MetadataBuilder: wrote {map.Count} timezone entries to {outputFile}"); return 0; } @@ -279,7 +282,8 @@ private static int BuildPerRegion( var key = MakeFileNameKey(metadata, isAlternateFormatsMetadata); var path = Path.Combine(outputDir, $"{filePrefix}_{key}"); using var fs = File.Create(path); - BuildMetadataFromBin.WriteMetadata(fs, metadata); + using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + BuildMetadataFromBin.WriteMetadata(gz, metadata); written++; } diff --git a/csharp/PhoneNumbers/MetadataLoader.cs b/csharp/PhoneNumbers/MetadataLoader.cs index a49748af..de399147 100644 --- a/csharp/PhoneNumbers/MetadataLoader.cs +++ b/csharp/PhoneNumbers/MetadataLoader.cs @@ -12,6 +12,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Reflection; namespace PhoneNumbers @@ -108,6 +109,12 @@ public EmbeddedResourceMetadataLoader(Assembly assembly, string resourcePrefix) } public Stream? LoadMetadata(string fileName) - => assembly.GetManifestResourceStream(resourcePrefix + fileName); + { + // The build pipeline gzips every bin before embedding it (see GZipStream wrapping in + // PhoneNumbers.MetadataBuilder). Decompress on the way out so callers see the plain + // bin format they already expect. + var raw = assembly.GetManifestResourceStream(resourcePrefix + fileName); + return raw == null ? null : new GZipStream(raw, CompressionMode.Decompress); + } } } diff --git a/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs b/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs index 5305d41b..303af0ce 100644 --- a/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs +++ b/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.Collections.Immutable; +using System.IO.Compression; using System.Linq; namespace PhoneNumbers @@ -105,7 +106,8 @@ private static PhoneNumberToTimeZonesMapper Create(string timezoneDataDirectory) var mapFile = names.FirstOrDefault(s => s.EndsWith(TZMAP_BIN_FILENAME, StringComparison.Ordinal)) ?? throw new MissingMetadataException( $"Timezone data resource '{prefix}{TZMAP_BIN_FILENAME}' not found on assembly '{asm.GetName().Name}'."); - using var fp = asm.GetManifestResourceStream(mapFile); + using var raw = asm.GetManifestResourceStream(mapFile); + using var fp = new GZipStream(raw, CompressionMode.Decompress); var prefixMap = BuildPrefixMapFromBin.ReadTimezoneMap(fp); // Rehydrate as IDictionary to match the existing constructor contract. IDictionary dict = prefixMap; diff --git a/csharp/PhoneNumbers/PrefixFileReader.cs b/csharp/PhoneNumbers/PrefixFileReader.cs index a4fd3c41..7b983162 100644 --- a/csharp/PhoneNumbers/PrefixFileReader.cs +++ b/csharp/PhoneNumbers/PrefixFileReader.cs @@ -19,6 +19,7 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; +using System.IO.Compression; using System.Linq; using System.Reflection; @@ -122,9 +123,10 @@ private AreaCodeMap GetPhonePrefixDescriptions(int prefixMapKey, string language private AreaCodeMap LoadAreaCodeMapFromFile(string fileName) { var resName = phonePrefixDataDirectory + fileName; - using var fp = assembly.GetManifestResourceStream(resName) + using var raw = assembly.GetManifestResourceStream(resName) ?? throw new MissingMetadataException( $"Prefix map resource '{resName}' not found on assembly '{assembly.GetName().Name}'."); + using var fp = new GZipStream(raw, CompressionMode.Decompress); var sortedMap = BuildPrefixMapFromBin.ReadAreaCodeMap(fp); var areaCodeMap = new AreaCodeMap(); From cbc6cb4c44ca4c4eb03e726841d1165dfe12f5e8 Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 14:05:58 -0500 Subject: [PATCH 5/7] dash helper --- csharp/PhoneNumbers/PhoneNumberMatcher.cs | 25 ++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/csharp/PhoneNumbers/PhoneNumberMatcher.cs b/csharp/PhoneNumbers/PhoneNumberMatcher.cs index bf9f1096..59652851 100644 --- a/csharp/PhoneNumbers/PhoneNumberMatcher.cs +++ b/csharp/PhoneNumbers/PhoneNumberMatcher.cs @@ -532,7 +532,30 @@ private static IList GetNationalNumberGroups(PhoneNumberUtil util, Phone } // The country-code will have a '-' following it. var startIndex = rfc3966Format.IndexOf('-') + 1; - return rfc3966Format.Substring(startIndex, endIndex - startIndex).Split('-'); + return SplitRangeOnDash(rfc3966Format, startIndex, endIndex); + } + + // Skips Substring(start, len).Split('-')'s intermediate string by counting groups in one + // pass and slicing directly into a right-sized array on the second. + private static string[] SplitRangeOnDash(string source, int startIndex, int endIndex) + { + var groupCount = 1; + for (var i = startIndex; i < endIndex; i++) + if (source[i] == '-') groupCount++; + + var result = new string[groupCount]; + var resultIdx = 0; + var groupStart = startIndex; + for (var i = startIndex; i < endIndex; i++) + { + if (source[i] == '-') + { + result[resultIdx++] = source.Substring(groupStart, i - groupStart); + groupStart = i + 1; + } + } + result[resultIdx] = source.Substring(groupStart, endIndex - groupStart); + return result; } /// From 28c19ca60b98db435a76022752d64905127c9d0b Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 15:30:17 -0500 Subject: [PATCH 6/7] add codecov settings --- codecov.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 codecov.yml diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..86f41021 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,14 @@ +coverage: + status: + project: + default: + target: auto + # Allow overall coverage to drop by up to 1% without failing the build. + # Catches gradual rot without flagging tiny noise from refactors. + threshold: 1% + patch: + default: + # Patch coverage on changed lines must be at least 90%. Catches PRs that + # ship large uncovered blocks while tolerating a few hard-to-test lines + # (resource-loader plumbing, AOT-only branches, etc.). + target: 90% From f4f483fa3f9311d857e848d454db073319f7a151 Mon Sep 17 00:00:00 2001 From: Thomas Clegg Date: Thu, 28 May 2026 15:55:38 -0500 Subject: [PATCH 7/7] address comments --- csharp/PhoneNumbers.MetadataBuilder/Program.cs | 9 +++------ csharp/PhoneNumbers/MetadataLoader.cs | 17 ++++++++++++++++- .../PhoneNumberToTimeZonesMapper.cs | 4 +++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/csharp/PhoneNumbers.MetadataBuilder/Program.cs b/csharp/PhoneNumbers.MetadataBuilder/Program.cs index e544f8c9..e2de3e80 100644 --- a/csharp/PhoneNumbers.MetadataBuilder/Program.cs +++ b/csharp/PhoneNumbers.MetadataBuilder/Program.cs @@ -138,8 +138,7 @@ private static int BuildGeocoding(string inputDir, string outputDir) var countryCode = Path.GetFileNameWithoutExtension(txtPath); var map = ParseAreaCodeText(txtPath); var outPath = Path.Combine(outputDir, $"{lang}.{countryCode}"); - using var fs = File.Create(outPath); - using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + using var gz = new GZipStream(File.Create(outPath), CompressionLevel.SmallestSize); BuildPrefixMapFromBin.WriteAreaCodeMap(gz, map); written++; } @@ -164,8 +163,7 @@ private static int BuildTimezones(string inputFile, string outputFile) Directory.CreateDirectory(Path.GetDirectoryName(outputFile)!); var map = ParseTimezoneText(inputFile, splitter: '&'); - using var fs = File.Create(outputFile); - using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + using var gz = new GZipStream(File.Create(outputFile), CompressionLevel.SmallestSize); BuildPrefixMapFromBin.WriteTimezoneMap(gz, map); Console.Out.WriteLine($"PhoneNumbers.MetadataBuilder: wrote {map.Count} timezone entries to {outputFile}"); return 0; @@ -281,8 +279,7 @@ private static int BuildPerRegion( { var key = MakeFileNameKey(metadata, isAlternateFormatsMetadata); var path = Path.Combine(outputDir, $"{filePrefix}_{key}"); - using var fs = File.Create(path); - using var gz = new GZipStream(fs, CompressionLevel.SmallestSize); + using var gz = new GZipStream(File.Create(path), CompressionLevel.SmallestSize); BuildMetadataFromBin.WriteMetadata(gz, metadata); written++; } diff --git a/csharp/PhoneNumbers/MetadataLoader.cs b/csharp/PhoneNumbers/MetadataLoader.cs index de399147..7dc2e0e6 100644 --- a/csharp/PhoneNumbers/MetadataLoader.cs +++ b/csharp/PhoneNumbers/MetadataLoader.cs @@ -69,6 +69,20 @@ public InMemoryMetadataLoader(Dictionary data) /// concatenates the configured prefix with the supplied file name and calls /// Assembly.GetManifestResourceStream. /// + /// + /// Resource encoding contract: this loader expects the manifest resource + /// bytes to be gzip-compressed and wraps the returned stream in a + /// for decompression. The build pipeline in + /// PhoneNumbers.MetadataBuilder applies this compression automatically before embedding. + /// + /// If you point this loader at a custom assembly whose PhoneNumberMetadata_* + /// (or analogously prefixed) resources are uncompressed bin bytes, decoding will fail + /// with an from on first + /// lazy-load. Options: gzip your resources before embedding, run them through + /// PhoneNumbers.MetadataBuilder which does so by default, or implement + /// directly and skip this class — + /// reads whatever stream the loader returns without further wrapping. + /// public sealed class EmbeddedResourceMetadataLoader : IMetadataLoader { /// @@ -97,7 +111,8 @@ public EmbeddedResourceMetadataLoader(Assembly assembly) : this(assembly, DefaultResourcePrefix) { } /// - /// Constructs a loader with a custom assembly and resource-name prefix. + /// Constructs a loader with a custom assembly and resource-name prefix. The assembly's + /// resources must be gzip-compressed bin files — see the class-level remarks for details. /// /// Assembly to read manifest resources from. /// Prefix prepended to every fileName passed to diff --git a/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs b/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs index 303af0ce..dd5502f6 100644 --- a/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs +++ b/csharp/PhoneNumbers/PhoneNumberToTimeZonesMapper.cs @@ -106,7 +106,9 @@ private static PhoneNumberToTimeZonesMapper Create(string timezoneDataDirectory) var mapFile = names.FirstOrDefault(s => s.EndsWith(TZMAP_BIN_FILENAME, StringComparison.Ordinal)) ?? throw new MissingMetadataException( $"Timezone data resource '{prefix}{TZMAP_BIN_FILENAME}' not found on assembly '{asm.GetName().Name}'."); - using var raw = asm.GetManifestResourceStream(mapFile); + using var raw = asm.GetManifestResourceStream(mapFile) + ?? throw new MissingMetadataException( + $"Timezone data resource '{mapFile}' not found on assembly '{asm.GetName().Name}'."); using var fp = new GZipStream(raw, CompressionMode.Decompress); var prefixMap = BuildPrefixMapFromBin.ReadTimezoneMap(fp); // Rehydrate as IDictionary to match the existing constructor contract.