Skip to content

Commit 58a6fb7

Browse files
committed
Compile the pattern for PatternFormatter
The verification that was made upon the pattern was almost like a parser/tokenizer in itself. This change leverages that behavior to introduce full pattern compilation. When the PatternFormatter encounters a novel pattern that it has never seen before, it compiles that pattern into a 3-step CompiledPattern instance that has a search regex, a replacement pattern and instructions for the callback. Upon seeing already compiled patterns, all the PatternFormatter has to do is perform the motions (one preg_replace_callback) of the existing compiled pattern. Further steps for pattern canonicalization could be taken, such as normalizing equivalent patterns into a single form, so they could share the same cached space. However, that micro-optimization was too expensive and counter-productive. This change also opens up possibilities for in-file warmup, as CompiledPattern instances are simple objects. An user could pre-compile his/her hot-path patterns beforehand to share the cache even across diferent processes.
1 parent abb8a25 commit 58a6fb7

3 files changed

Lines changed: 211 additions & 213 deletions

File tree

src/Internal/CompiledPattern.php

Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
<?php
2+
3+
/*
4+
* SPDX-FileCopyrightText: (c) Respect Project Contributors
5+
* SPDX-License-Identifier: ISC
6+
* SPDX-FileContributor: Alexandre Gomes Gaigalas <alganet@gmail.com>
7+
*/
8+
9+
declare(strict_types=1);
10+
11+
namespace Respect\StringFormatter\Internal;
12+
13+
use Respect\StringFormatter\InvalidFormatterException;
14+
15+
use function array_keys;
16+
use function count;
17+
use function implode;
18+
use function mb_strtolower;
19+
use function mb_strtoupper;
20+
use function mb_substr;
21+
use function preg_match;
22+
use function preg_match_all;
23+
use function sprintf;
24+
use function str_starts_with;
25+
use function strtolower;
26+
use function substr;
27+
28+
use const PREG_OFFSET_CAPTURE;
29+
30+
final class CompiledPattern
31+
{
32+
private const array FILTERS = [
33+
'#' => '.',
34+
'0' => '[[:digit:]]',
35+
'A' => '[[:upper:]]',
36+
'a' => '[[:lower:]]',
37+
'C' => '[[:alpha:]]',
38+
'W' => '[[:alnum:]]',
39+
];
40+
41+
private const array TRANSFORM_MAP = ['l' => 'lower', 'u' => 'upper', 'i' => 'invert'];
42+
43+
/** @var array<string, CompiledPattern> */
44+
private static array $compiledPatterns = [];
45+
46+
/** @var array<string, array<int, string>> */
47+
private static array $compiledQualifiers = [];
48+
49+
private function __construct(
50+
private(set) readonly string $pattern,
51+
private(set) readonly string $search,
52+
private(set) readonly string $replacement,
53+
/** @var array<int, array{filter: string, transform: string|null}> */
54+
private(set) readonly array $instructions,
55+
) {
56+
}
57+
58+
public static function compile(string $pattern): self
59+
{
60+
if (isset(self::$compiledPatterns[$pattern])) {
61+
return self::$compiledPatterns[$pattern];
62+
}
63+
64+
if ($pattern === '') {
65+
throw new InvalidFormatterException('Pattern cannot be empty');
66+
}
67+
68+
$search = '';
69+
$replacement = '';
70+
$instructions = [];
71+
$groupIndex = 1;
72+
73+
$transformState = null;
74+
$nextTransform = null;
75+
76+
preg_match_all(sprintf(
77+
'/(?:\\\\.|[%1$s]|(?:\{[^}]*\}|[*+?])|[^\\\%1$s{}+*?]+|.)/u',
78+
implode('', array_keys(self::FILTERS)),
79+
), $pattern, $tokens, PREG_OFFSET_CAPTURE);
80+
81+
$tokenList = $tokens[0];
82+
$count = count($tokenList);
83+
84+
for ($i = 0; $i < $count; $i++) {
85+
[$tokenText, $offset] = $tokenList[$i];
86+
87+
if (str_starts_with($tokenText, '\\')) {
88+
if ($tokenText === '\\') {
89+
throw new InvalidFormatterException('Incomplete escape sequence at end of pattern');
90+
}
91+
92+
$char = mb_substr($tokenText, 1);
93+
94+
if ($char === 'd') {
95+
$inner = '.';
96+
$search .= "((?:.*?$inner){0,1})";
97+
$replacement .= "%{$groupIndex}$";
98+
$instructions[$groupIndex] = ['filter' => "/$inner/u", 'transform' => 'delete'];
99+
$groupIndex++;
100+
continue;
101+
}
102+
103+
if ($char === 'E') {
104+
$transformState = null;
105+
continue;
106+
}
107+
108+
if (isset(self::TRANSFORM_MAP[$char])) {
109+
$nextTransform = self::TRANSFORM_MAP[$char];
110+
continue;
111+
}
112+
113+
$lowerChar = strtolower($char);
114+
if (isset(self::TRANSFORM_MAP[$lowerChar]) && $char !== $lowerChar) {
115+
$transformState = self::TRANSFORM_MAP[$lowerChar];
116+
continue;
117+
}
118+
119+
$replacement .= $char;
120+
continue;
121+
}
122+
123+
if (isset(self::FILTERS[$tokenText])) {
124+
$filterChar = $tokenText;
125+
$regexQuantifier = '{0,1}';
126+
127+
if (isset($tokenList[$i + 1]) && preg_match('/^(?:\{[^}]*\}|[*+?])$/u', $tokenList[$i + 1][0])) {
128+
$i++;
129+
$regexQuantifier = self::compileQualifier($tokenList[$i][0], $tokenList[$i][1]);
130+
}
131+
132+
$inner = self::FILTERS[$filterChar];
133+
$search .= "((?:.*?$inner)$regexQuantifier)";
134+
135+
$replacement .= "%{$groupIndex}$";
136+
$instructions[$groupIndex] = [
137+
'filter' => "/$inner/u",
138+
'transform' => $nextTransform ?? $transformState,
139+
];
140+
141+
$groupIndex++;
142+
$nextTransform = null;
143+
continue;
144+
}
145+
146+
if (preg_match('/^(?:\{[^}]*\}|[*+?])$/u', $tokenText)) {
147+
throw new InvalidFormatterException(sprintf('Quantifier "%s" must follow a filter pattern at position %d', $tokenText[0], $offset));
148+
}
149+
150+
if (str_starts_with($tokenText, '{')) {
151+
throw new InvalidFormatterException(sprintf('Invalid or malformed quantifier at position %d', $offset));
152+
}
153+
154+
$replacement .= $tokenText;
155+
}
156+
157+
return self::$compiledPatterns[$pattern] = new self($pattern, '/^' . $search . '/us', $replacement, $instructions);
158+
}
159+
160+
public static function transform(string $val, string|null $transform): string
161+
{
162+
return match ($transform) {
163+
'delete' => '',
164+
'lower' => mb_strtolower($val),
165+
'upper' => mb_strtoupper($val),
166+
'invert' => mb_strtolower($val) ^ mb_strtoupper($val) ^ $val,
167+
default => $val,
168+
};
169+
}
170+
171+
private static function compileQualifier(string $token, int $offset): string
172+
{
173+
if (isset(self::$compiledQualifiers[$token][$offset])) {
174+
return self::$compiledQualifiers[$token][$offset];
175+
}
176+
177+
if ($token === '*' || $token === '+') {
178+
return '*';
179+
}
180+
181+
$content = substr($token, 1, -1);
182+
if ($content === '' || $content === ',' || !preg_match('/^(\d+(?:,\d*)?|,\d+)$/', $content)) {
183+
throw new InvalidFormatterException(sprintf('Invalid or malformed quantifier at position %d', $offset));
184+
}
185+
186+
preg_match('/^\{(\d*)(?:,(\d*))?\}$/', $token, $m);
187+
$max = $m[2] ?? $m[1];
188+
189+
return self::$compiledQualifiers[$token][$offset] = $max === '' ? '*' : "{0,$max}";
190+
}
191+
}

0 commit comments

Comments
 (0)