-
Notifications
You must be signed in to change notification settings - Fork 52
Expand file tree
/
Copy pathParser.php
More file actions
487 lines (459 loc) · 23.9 KB
/
Parser.php
File metadata and controls
487 lines (459 loc) · 23.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
<?php
namespace Fhp\Syntax;
use Fhp\Segment\AnonymousSegment;
use Fhp\Segment\BaseDeg;
use Fhp\Segment\BaseSegment;
use Fhp\Segment\ElementDescriptor;
use Fhp\Segment\Segmentkopf;
/**
* Parses the FinTS wire format (aka. syntax) into Messages, Segments, Data Element Groups (DEG) and Data Elements (DE).
*
* @link https://www.hbci-zka.de/dokumente/spezifikation_deutsch/fintsv3/FinTS_3.0_Formals_2017-10-06_final_version.pdf
* Section H.1 "Nachrichtensyntax"
*/
abstract class Parser
{
/** @var string Name of the PHP namespace under which all the segments are stored. */
public const SEGMENT_NAMESPACE = 'Fhp\Segment';
/**
* The FinTs wire format specifies escaping with a question mark `?` for the syntax characters `+:'?@`. This
* function splits strings delimited by one of these while honoring escaping and binary blocks marked with a
* `@<size>@` header within the string.
*
* @link: https://www.hbci-zka.de/dokumente/spezifikation_deutsch/fintsv3/FinTS_3.0_Formals_2017-10-06_final_version.pdf
* Section H.1.3 "Entwertung"
*
* @param string $delimiter The delimiter around which to split.
* @param string $str The raw string, usually a response from the server.
* @param bool $trailingDelimiter If this is true, the delimiter is expected at the very end, and also kept at the
* end of each returned substring, i.e. it's considered part of each item instead of a delimiter between items.
* @return string[] The splitted substrings. Note that escaped characters inside will still be escaped.
*/
public static function splitEscapedString(string $delimiter, string $str, bool $trailingDelimiter = false): array
{
if (strlen($str) === 0) {
return [];
}
// Since most of the $delimiters used in FinTs are also special characters in regexes, we need to escape.
$delimiter = preg_quote($delimiter, '/');
$nextBegin = 0;
$offset = 0;
$result = [];
while (true) {
// Walk to the next syntax character of interest and handle it respectively.
$ret = preg_match("/\\?|@([0-9]+)@|$delimiter/", $str, $match, PREG_OFFSET_CAPTURE, $offset);
if ($ret === false) {
throw new \RuntimeException("preg_match failed on $str");
}
if ($ret === 0) { // There is no more syntax character behind $offset.
if ($trailingDelimiter) {
// The last character should have been a delimiter, so there should be no content remaining.
if ($nextBegin !== strlen($str)) {
throw new \InvalidArgumentException(
'Unexpected content after last delimiter: ' . substr($str, $nextBegin));
}
} else {
// Anything behind the last delimiter forms the last substring.
$result[] = substr($str, $nextBegin);
}
break;
}
$matchedStr = $match[0][0]; // $match[0] refers to the entire matched string. [0] has the content
$matchedOffset = (int) $match[0][1]; // and [1] has the offset within $str.
if ($matchedStr === '?') {
// It's an escape character, so we should ignore this character and the next one.
$offset = $matchedOffset + 2;
if ($offset > strlen($str)) {
throw new \InvalidArgumentException('Input ends on unescaped escape character.');
}
} elseif ($matchedStr[0] === Delimiter::BINARY) {
// It's a block binary data, which we should skip entirely.
$binaryLength = $match[1][0]; // $match[1] refers to the first (and only) capture group in the regex.
if (!is_numeric($binaryLength)) {
throw new \AssertionError("Invalid binary length $binaryLength");
}
// Note: The FinTS specification says that the length of the binary block is given in bytes (not
// characters) and PHP's string functions like substr() or preg_match() also operate on byte offsets, so
// this is fine.
$offset = $matchedOffset + strlen($matchedStr) + (int) $binaryLength;
if ($offset > strlen($str)) {
throw new \InvalidArgumentException(
"Incomplete binary block at offset $matchedOffset, declared length $binaryLength, but "
. 'only has ' . (strlen($str) - $matchedOffset - strlen($matchedStr)) . ' bytes left');
}
} else {
// The delimiter was matched, so output one splitted string and advance past the delimiter.
$result[] = substr($str, $nextBegin, $matchedOffset - $nextBegin + ($trailingDelimiter ? 1 : 0));
$nextBegin = $matchedOffset + strlen($matchedStr);
$offset = $nextBegin;
}
}
return $result;
}
/**
* @param string $str The raw string, usually a response from the server.
* @return string The string with the escaping removed.
*/
public static function unescape(string $str): string
{
return preg_replace('/\?([+:\'?@])/', '$1', $str);
}
/**
* Parses a scalar value aka. "Datenelement" (DE).
*
* @link: https://www.hbci-zka.de/dokumente/spezifikation_deutsch/fintsv3/FinTS_3.0_Formals_2017-10-06_final_version.pdf
* Section B.4 Datenformate
*
* @param string $rawValue The raw value (wire format).
* @param string $type The PHP type that we need. This should support exactly the values for which
* {@link ElementDescriptor::isScalarType()} returns true.
* @return bool|float|int|string|null The parsed value of type $type, null if the $rawValue was empty.
*/
public static function parseDataElement(string $rawValue, string $type)
{
if ($rawValue === '') {
return null;
}
if ($type === 'int' || $type === 'integer') {
if (!is_numeric($rawValue)) {
throw new \InvalidArgumentException("Invalid int: $rawValue");
}
return (int) $rawValue;
} elseif ($type === 'float') {
$rawValue = str_replace(',', '.', $rawValue, $numCommas);
if (!is_numeric($rawValue) || $numCommas !== 1) {
throw new \InvalidArgumentException("Invalid float: $rawValue");
}
return (float) $rawValue;
} elseif ($type === 'bool' || $type === 'boolean') {
if ($rawValue === 'J') {
return true;
}
if ($rawValue === 'N') {
return false;
}
throw new \InvalidArgumentException("Invalid bool: $rawValue");
} elseif ($type === 'string') {
// Convert ISO-8859-1 (FinTS wire format encoding) to UTF-8 (PHP's encoding)
return mb_convert_encoding(static::unescape($rawValue), 'UTF-8', 'ISO-8859-1');
} else {
throw new \RuntimeException("Unsupported type $type");
}
}
/**
* @param string $rawValue The raw value (wire format), e.g. "@4@abcd".
* @return Bin|null The parsed value, or null if $rawValue was empty.
*/
public static function parseBinaryBlock(string $rawValue): ?Bin
{
if ($rawValue === '') {
return null;
}
$delimiterPos = strpos($rawValue, Delimiter::BINARY, 1);
if (
substr($rawValue, 0, 1) !== Delimiter::BINARY
|| $delimiterPos === false
) {
throw new \InvalidArgumentException("Expected binary block header, got $rawValue");
}
$lengthStr = substr($rawValue, 1, $delimiterPos - 1);
if (!is_numeric($lengthStr)) {
throw new \InvalidArgumentException("Invalid binary block length: $lengthStr");
}
$length = (int) $lengthStr;
$result = new Bin(substr($rawValue, $delimiterPos + 1));
$actualLength = strlen($result->getData());
if ($actualLength !== $length) {
throw new \InvalidArgumentException("Expected binary block of length $length, got $actualLength");
}
return $result;
}
/**
* @param string $rawElements The serialized wire format for a data element group.
* @param string|BaseDeg $type The type (PHP class name) of the Deg to be parsed, or alternatively the instance to
* write to (the same instance will be returned from this function).
* @param bool $allowEmpty If true, this returns either a valid DEG, or null if *all* the fields were empty.
* @return BaseDeg|null The parsed value, of type $type, or null if all fields were empty and $allowEmpty is true.
*/
public static function parseDeg(string $rawElements, $type, bool $allowEmpty = false): ?BaseDeg
{
$rawElements = static::splitEscapedString(Delimiter::GROUP, $rawElements);
list($result, $offset) = static::parseDegElements($rawElements, $type, $allowEmpty);
if ($offset < count($rawElements)) {
throw new \InvalidArgumentException(
"Expected only $offset elements, but got " . count($rawElements) . ': ' . print_r($rawElements, true));
}
return $result;
}
/**
* @param string[] $rawElements The serialized wire format for a series of elements (already splitted). This array
* will be modified in that the elements that were consumed are removed from the beginning.
* @param string|BaseDeg $type The type (PHP class name) of the Deg to be parsed, or alternatively the instance to
* write to (the same instance will be returned from this function).
* @param bool $allowEmpty If true, this returns either a valid DEG, or null if *all* the fields were empty.
* @param int $offset The position in $rawElements to be read next.
* @return array (BaseDeg|null, int)
* 1. The parsed value, which has the given $type or is null in case all the fields were empty and $allowEmpty
* is true.
* 2. The offset at which parsing should continue. The difference between this returned offset and the $offset
* that was passed in is the number of elements that this function call consumed.
*/
private static function parseDegElements(array $rawElements, $type, bool $allowEmpty = false, int $offset = 0): array
{
/** @var BaseDeg $result */
$result = is_string($type) ? new $type() : $type;
$descriptor = $result->getDescriptor();
$expectedIndex = 0;
$allEmpty = true;
$missingFieldError = null; // When $allowEmpty, we need to tolerate errors at first, but maybe throw them later.
// The iteration order guarantees that $index is strictly monotonically increasing, but there can be gaps.
foreach ($descriptor->elements as $index => $elementDescriptor) {
$offset += ($index - $expectedIndex); // Adjust for skipped indices.
$numRepetitions = $elementDescriptor->repeated === 0 ? 1 : $elementDescriptor->repeated;
$expectedIndex += $numRepetitions; // Advance to next expected elementDescriptor index.
$isSingleField = is_string($elementDescriptor->type) // Scalar type / DE
|| $elementDescriptor->type->getName() === Bin::class;
// Skip optional single elements that are not present. Note that for elements with multiple fields we cannot
// just skip because here we would only detect whether the first field is empty or not.
if ($isSingleField && (!array_key_exists($offset, $rawElements) || $rawElements[$offset] === '')) {
if ($elementDescriptor->optional) {
++$offset;
continue;
} elseif ($missingFieldError === null) {
$missingFieldError = new \InvalidArgumentException(
"Missing field $descriptor->class.$elementDescriptor->field");
if (!$allowEmpty) {
throw $missingFieldError;
}
}
}
// Parse element (possibly multiple values recursively).
try {
for ($repetition = 0; $repetition < $numRepetitions; ++$repetition) {
if ($offset >= count($rawElements)) {
break; // End of input reached
}
if ($isSingleField) {
if ($rawElements[$offset] === '' && $repetition >= 1) { // Skip empty repeated entries.
++$offset;
continue;
}
if (is_string($elementDescriptor->type)) {
$value = static::parseDataElement($rawElements[$offset], $elementDescriptor->type);
} else {
$value = static::parseBinaryBlock($rawElements[$offset]);
}
++$offset;
} else { // Nested DEG, will consume a certain number of elements and adjust the $offset accordingly.
list($value, $offset) = static::parseDegElements(
$rawElements, $elementDescriptor->type->name,
$allowEmpty || $elementDescriptor->optional, $offset);
}
if ($value !== null) {
$allEmpty = false;
}
if ($elementDescriptor->repeated === 0) {
$result->{$elementDescriptor->field} = $value;
} elseif ($value !== null) {
$result->{$elementDescriptor->field}[] = $value;
}
}
} catch (\InvalidArgumentException $e) {
throw new \InvalidArgumentException("Failed to parse $descriptor->class::$elementDescriptor->field: $e");
}
}
if ($allEmpty && $allowEmpty) {
return [null, $offset];
}
if ($missingFieldError !== null) {
throw $missingFieldError;
}
return [$result, $offset];
}
/**
* Tries to (recursively) create an empty instance for a field with the given descriptor. For optional fields, this
* is simply null. If the field type is a subclass of {@link BaseDeg} and all fields in the DEG have valid empty
* values (recursively), then an empty instance will be returned. Otherwise, the value cannot be empty and this
* function returns false.
*
* @param ElementDescriptor $descriptor The descriptor of the field to fill in.
* @return BaseDeg|false|null A new empty instance of the field's value type, or null if that's a valid empty value
* for the field, or false if no empty value is possible, i.e. if there is at least one non-optional field
* within.
*/
private static function tryConstructEmptyValue(ElementDescriptor $descriptor)
{
if ($descriptor->optional) {
return null; // No need to fill optional fields.
}
if ($descriptor->repeated !== 0) {
return false; // Cannot fill a repeated field that requires at least one entry.
}
if (!($descriptor->type instanceof \ReflectionClass && $descriptor->type->isSubclassOf(BaseDeg::class))) {
return false; // Cannot create empty value for non-DEG field.
}
try {
/** @var BaseDeg $result */
$result = $descriptor->type->newInstance();
} catch (\ReflectionException $e) {
throw new \RuntimeException("Failed to create $descriptor->type", 0, $e);
}
foreach ($result->getDescriptor()->elements as $elementDescriptor) {
$emptyValue = static::tryConstructEmptyValue($elementDescriptor);
if ($emptyValue === false) {
return false;
}
$result->{$elementDescriptor->field} = $emptyValue;
}
return $result;
}
/**
* @param string $rawSegment The serialized wire format for a single segment (segment delimiter must be present at
* the end). This should be ISO-8859-1-encoded.
* @param string|BaseSegment $type The type (PHP class name) of the segment to be parsed, or alternatively the
* instance to write to (the same instance will be returned from this function).
* @return BaseSegment The parsed segment of type $type.
*/
public static function parseSegment(string $rawSegment, $type): BaseSegment
{
/** @var BaseSegment $result */
$result = is_string($type) ? new $type() : $type;
$rawElements = static::splitIntoSegmentElements($rawSegment);
$descriptor = $result->getDescriptor();
if (array_key_last($rawElements) > $descriptor->maxIndex) {
throw new \InvalidArgumentException("Too many elements for $descriptor->class: $rawSegment");
}
// The iteration order guarantees that $index is strictly monotonically increasing, but there can be gaps.
foreach ($descriptor->elements as $index => $elementDescriptor) {
if (!array_key_exists($index, $rawElements) || $rawElements[$index] === '') {
$emptyValue = static::tryConstructEmptyValue($elementDescriptor);
if ($emptyValue === false) {
throw new \InvalidArgumentException("Missing field $descriptor->class.$elementDescriptor->field");
}
$result->{$elementDescriptor->field} = $emptyValue;
continue;
}
// Note: The handling of empty values may be incorrect here, parseSegmentElement() can return null.
if ($elementDescriptor->repeated === 0) {
$result->{$elementDescriptor->field} =
static::parseSegmentElement($rawElements[$index], $elementDescriptor);
} else {
for ($repetition = 0; $repetition < $elementDescriptor->repeated; ++$repetition) {
if ($index + $repetition >= count($rawElements)) {
break; // End of input reached.
}
if ($rawElements[$index + $repetition] !== '') { // Skip empty entries.
$result->{$elementDescriptor->field}[$repetition] =
static::parseSegmentElement($rawElements[$index + $repetition], $elementDescriptor);
}
}
}
}
if ($result->segmentkopf->segmentkennung !== $descriptor->kennung) {
throw new \InvalidArgumentException(
"Invalid segment type $result->segmentkopf->segmentkennung for $descriptor->class");
}
if ($result->segmentkopf->segmentversion !== $descriptor->version) {
throw new \InvalidArgumentException(
"Invalid version $result->segmentkopf->segmentversion for $descriptor->class");
}
return $result;
}
/**
* @param string $rawSegment The serialized wire format for a single segment (segment delimiter must be present at
* the end).
* @return AnonymousSegment The segment parsed as an anonymous segment.
*/
public static function parseAnonymousSegment(string $rawSegment): AnonymousSegment
{
$rawElements = static::splitIntoSegmentElements($rawSegment);
return new AnonymousSegment(
Segmentkopf::parse(array_shift($rawElements)),
array_map(function ($rawElement) {
if (strlen($rawElement) === 0) {
return null;
}
$subElements = static::splitEscapedString(Delimiter::GROUP, $rawElement);
if (count($subElements) <= 1) {
return $rawElement;
} // Asume it's not repeated.
return $subElements;
}, $rawElements));
}
/**
* @param string $rawSegment The serialized wire format for a single segment incl delimiter at the end.
* @return string[] The segment splitted into raw elements.
*/
private static function splitIntoSegmentElements(string $rawSegment): array
{
if (substr($rawSegment, -1) !== Delimiter::SEGMENT) {
throw new \InvalidArgumentException("Raw segment does not end with delimiter: $rawSegment");
}
$rawSegment = substr($rawSegment, 0, -1); // Strip segment delimiter at the end.
$rawElements = static::splitEscapedString(Delimiter::ELEMENT, $rawSegment);
if (count($rawElements) === 0) {
throw new \InvalidArgumentException("Invalid segment: $rawSegment");
}
return $rawElements;
}
/**
* @param string $rawElement The raw content (unparsed wire format) of an element, which can either be a single
* Data Element (DE) or a group (DEG), as determined by the descriptor.
* @param ElementDescriptor $descriptor The descriptor that describes the expected format of the element.
* @return BaseDeg|Bin|bool|float|int|string|null The parsed value, or null if it was empty.
*/
private static function parseSegmentElement(string $rawElement, ElementDescriptor $descriptor)
{
if (is_string($descriptor->type)) { // Scalar value / DE
return static::parseDataElement($rawElement, $descriptor->type);
}
if ($descriptor->type->getName() === Bin::class) {
return static::parseBinaryBlock($rawElement);
}
return static::parseDeg($rawElement, $descriptor->type->name, $descriptor->optional);
}
/**
* @param string $rawSegment The serialized wire format for a single segment (segment delimiter must be present at
* the end). This should be ISO-8859-1-encoded.
* @return BaseSegment The parsed segment, possibly an {@link AnonymousSegment}.
*/
public static function detectAndParseSegment(string $rawSegment): BaseSegment
{
if (substr($rawSegment, -1) !== Delimiter::SEGMENT) {
throw new \InvalidArgumentException("Raw segment does not end with delimiter: $rawSegment");
}
$firstElementDelimiter = strpos($rawSegment, Delimiter::ELEMENT);
if ($firstElementDelimiter === false) {
// Let's assume it's an empty segment, i.e. all of it is the header.
$firstElementDelimiter = strlen($rawSegment) - 1; // Exclude the SEGMENT delimiter at the end.
}
$segmentkopf = Segmentkopf::parse(substr($rawSegment, 0, $firstElementDelimiter));
// Try the default class name Fhp\Segment\HABCD\HABCDvN.
$segmentType = static::SEGMENT_NAMESPACE . '\\' . $segmentkopf->segmentkennung . '\\'
. $segmentkopf->segmentkennung . 'v' . $segmentkopf->segmentversion;
if (class_exists($segmentType)) {
return static::parseSegment($rawSegment, $segmentType);
}
// Alternatively, allow Geschäftsvorfall segments (HKXYZ, HIXYZ and HIXYZS) to live in an abbreviated namespace,
// i.e. like Fhp\Segment\XYZ\HKXYZSvN
$segmentType = static::SEGMENT_NAMESPACE . '\\' . substr($segmentkopf->segmentkennung, 2, 3) . '\\'
. $segmentkopf->segmentkennung . 'v' . $segmentkopf->segmentversion;
if (class_exists($segmentType)) {
return static::parseSegment($rawSegment, $segmentType);
}
// If the segment type is not implemented, fall back to an anonymous segment.
return static::parseAnonymousSegment($rawSegment);
}
/**
* @param string $rawSegments Concatenated segments in wire format.
* @return BaseSegment[] The parsed segments.
*/
public static function parseSegments(string $rawSegments): array
{
if (strlen($rawSegments) === 0) {
return [];
}
$rawSegments = static::splitEscapedString(Delimiter::SEGMENT, $rawSegments, true);
return array_map([static::class, 'detectAndParseSegment'], $rawSegments);
}
}