Skip to content

Commit 473f0d0

Browse files
committed
Add a new HTMLType
1 parent b59fdb7 commit 473f0d0

14 files changed

Lines changed: 377 additions & 32 deletions

File tree

src/core/etl/src/Flow/ETL/Row/EntryFactory.php

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,16 @@ enum_entry,
2020
uuid_entry,
2121
xml_element_entry,
2222
xml_entry};
23-
use function Flow\Types\DSL\{type_date, type_datetime, type_json, type_optional, type_string, type_time, type_uuid, type_xml, type_xml_element};
23+
use function Flow\Types\DSL\{type_date,
24+
type_datetime,
25+
type_html,
26+
type_json,
27+
type_optional,
28+
type_string,
29+
type_time,
30+
type_uuid,
31+
type_xml,
32+
type_xml_element};
2433
use Flow\ETL\Exception\{InvalidArgumentException,
2534
RuntimeException,
2635
SchemaDefinitionNotFoundException};
@@ -41,6 +50,7 @@ enum_entry,
4150
UuidType,
4251
XMLElementType,
4352
XMLType};
53+
use Flow\Types\Type\Logical\HTMLType;
4454
use Flow\Types\Type\Native\{
4555
ArrayType,
4656
BooleanType,
@@ -90,14 +100,12 @@ public function create(string $entryName, mixed $value, Schema|Definition|null $
90100

91101
if ($stringChecker->isJson()) {
92102
$valueType = type_json();
93-
}
94-
95-
if ($stringChecker->isUuid()) {
103+
} elseif ($stringChecker->isUuid()) {
96104
$valueType = type_uuid();
97-
}
98-
99-
if ($stringChecker->isXML()) {
105+
} elseif ($stringChecker->isXML()) {
100106
$valueType = type_xml();
107+
} elseif ($stringChecker->isHTML()) {
108+
$valueType = type_html();
101109
}
102110
}
103111

@@ -222,6 +230,11 @@ public function createAs(string $entryName, mixed $value, Definition|Type $defin
222230
}
223231
}
224232

233+
if ($type instanceof HTMLType) {
234+
// TODO https://github.com/flow-php/flow/issues/1914
235+
return xml_entry($entryName, type_optional($type)->cast($value), $metadata);
236+
}
237+
225238
if ($type instanceof XMLType) {
226239
return xml_entry($entryName, type_optional($type)->cast($value), $metadata);
227240
}

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/CastTest.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public static function cast_provider() : array
4545
'xml_to_array' => [$xml, 'array', ['root' => ['foo' => ['@attributes' => ['baz' => 'buz'], '@value' => 'bar']]]],
4646
'string_to_xml' => [$xmlString, 'xml', $xml],
4747
'xml_to_string' => [$xml, 'string', '<root><foo baz="buz">bar</foo></root>'],
48+
'full_xml_to_string' => [$fullXMLString, 'string', '<root><foo baz="buz">bar</foo></root>'],
4849
'datetime' => [new \DateTimeImmutable('2023-01-01 00:00:00 UTC'), 'string', '2023-01-01T00:00:00+00:00'],
4950
'datetime_to_date' => [new \DateTimeImmutable('2023-01-01 00:01:00 UTC'), 'date', new \DateTimeImmutable('2023-01-01T00:00:00+00:00')],
5051
'uuid' => [Uuid::fromString('a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'), 'string', 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'],

src/lib/types/src/Flow/Types/DSL/functions.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
UuidType,
2626
XMLElementType,
2727
XMLType};
28+
use Flow\Types\Type\Logical\HTMLType;
2829
use Flow\Types\Type\Native\{ArrayType,
2930
BooleanType,
3031
CallableType,
@@ -425,6 +426,15 @@ function type_literal(bool|float|int|string $value) : LiteralType
425426
return new LiteralType($value);
426427
}
427428

429+
/**
430+
* @return Type<\DOMDocument>
431+
*/
432+
#[DocumentationDSL(module: Module::TYPES, type: DSLType::TYPE)]
433+
function type_html() : Type
434+
{
435+
return new HTMLType();
436+
}
437+
428438
/**
429439
* @template T
430440
*

src/lib/types/src/Flow/Types/Type/AutoCaster.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@
99
type_date,
1010
type_datetime,
1111
type_float,
12+
type_html,
1213
type_integer,
1314
type_json,
14-
type_uuid};
15+
type_uuid,
16+
type_xml};
1517
use Flow\Types\Type\Native\String\StringTypeChecker;
1618

1719
final readonly class AutoCaster
@@ -95,6 +97,14 @@ private function castToString(string $value) : mixed
9597
return type_datetime()->cast($value);
9698
}
9799

100+
if ($typeChecker->isXML()) {
101+
return type_xml()->cast($value);
102+
}
103+
104+
if ($typeChecker->isHTML()) {
105+
return type_html()->cast($value);
106+
}
107+
98108
return $value;
99109
}
100110
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\Types\Type\Logical;
6+
7+
use function Flow\Types\DSL\type_string;
8+
use Flow\Types\Exception\{CastingException, InvalidTypeException};
9+
use Flow\Types\Type;
10+
11+
/**
12+
* @implements Type<\DOMDocument>
13+
*/
14+
final readonly class HTMLType implements Type
15+
{
16+
public function assert(mixed $value) : \DOMDocument
17+
{
18+
if ($this->isValid($value)) {
19+
return $value;
20+
}
21+
22+
throw InvalidTypeException::value($value, $this);
23+
}
24+
25+
public function cast(mixed $value) : \DOMDocument
26+
{
27+
if ($this->isValid($value)) {
28+
return $value;
29+
}
30+
31+
if (\is_string($value)) {
32+
return $this->htmlDocument($value);
33+
}
34+
35+
try {
36+
$stringValue = type_string()->cast($value);
37+
38+
return $this->htmlDocument($stringValue);
39+
} catch (CastingException $e) {
40+
throw new CastingException($value, $this, $e);
41+
}
42+
}
43+
44+
public function isValid(mixed $value) : bool
45+
{
46+
if ($value instanceof \DOMDocument) {
47+
return $this->isHtml((string) $value->saveHTML());
48+
}
49+
50+
return false;
51+
}
52+
53+
public function normalize() : array
54+
{
55+
return [
56+
'type' => 'html',
57+
];
58+
}
59+
60+
public function toString() : string
61+
{
62+
return 'html';
63+
}
64+
65+
private function htmlDocument(string $value) : \DOMDocument
66+
{
67+
$doc = new \DOMDocument();
68+
69+
if (!$this->isHtml($value)) {
70+
throw new CastingException($value, $this);
71+
}
72+
73+
return $doc;
74+
}
75+
76+
private function isHtml(string $value) : bool
77+
{
78+
$doc = new \DOMDocument();
79+
80+
return (bool) @$doc->loadHTML($value, \LIBXML_HTML_NOIMPLIED | \LIBXML_HTML_NODEFDTD);
81+
}
82+
}

src/lib/types/src/Flow/Types/Type/Logical/NonEmptyStringType.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public function cast(mixed $value) : string
5050
}
5151

5252
if ($value instanceof \DOMDocument) {
53-
return $this->assert($value->saveXML($value->documentElement) ?: '');
53+
return $this->assert(($value->saveXML($value->documentElement) ?: $value->saveHtml($value->documentElement)) ?: '');
5454
}
5555

5656
if ($value instanceof \DOMElement) {

src/lib/types/src/Flow/Types/Type/Logical/XMLType.php

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Flow\Types\Type\Logical;
66

7-
use function Flow\Types\DSL\{type_string, type_xml};
7+
use function Flow\Types\DSL\type_string;
88
use Flow\Types\Exception\{CastingException, InvalidTypeException};
99
use Flow\Types\Type;
1010

@@ -28,11 +28,11 @@ public function cast(mixed $value) : \DOMDocument
2828
return $value;
2929
}
3030

31-
if (\is_string($value)) {
32-
$doc = new \DOMDocument();
31+
$doc = new \DOMDocument();
3332

34-
if (!@$doc->loadXML($value)) {
35-
throw new CastingException($value, type_xml());
33+
if (\is_string($value)) {
34+
if (!$this->isXml($doc, $value)) {
35+
throw new CastingException($value, $this);
3636
}
3737

3838
return $doc;
@@ -41,9 +41,7 @@ public function cast(mixed $value) : \DOMDocument
4141
try {
4242
$stringValue = type_string()->cast($value);
4343

44-
$doc = new \DOMDocument();
45-
46-
if (!@$doc->loadXML((string) $stringValue)) {
44+
if (!$this->isXml($doc, $stringValue)) {
4745
throw new CastingException($stringValue, $this);
4846
}
4947

@@ -56,7 +54,7 @@ public function cast(mixed $value) : \DOMDocument
5654
public function isValid(mixed $value) : bool
5755
{
5856
if ($value instanceof \DOMDocument) {
59-
return true;
57+
return $this->isXml($value, (string) $value->saveXML());
6058
}
6159

6260
return false;
@@ -73,4 +71,13 @@ public function toString() : string
7371
{
7472
return 'xml';
7573
}
74+
75+
private function isXml(\DOMDocument $document, string $value) : bool
76+
{
77+
if (!\preg_match('/<xml(.+?)>(.+?)<\/xml>/', $value)) {
78+
return false;
79+
}
80+
81+
return (bool) @$document->loadXML($value);
82+
}
7683
}

src/lib/types/src/Flow/Types/Type/Native/String/StringTypeChecker.php

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,35 @@ public function isFloat() : bool
108108
return \is_numeric($this->string) && \str_contains($this->string, '.');
109109
}
110110

111+
public function isHTML() : bool
112+
{
113+
if ($this->string === '') {
114+
return false;
115+
}
116+
117+
if ('<' !== $this->string[0]) {
118+
return false;
119+
}
120+
121+
if (\preg_match('/<(.+?)>(.+?)<\/(.+?)>/', $this->string) === 1) {
122+
try {
123+
\libxml_use_internal_errors(true);
124+
125+
$doc = new \DOMDocument();
126+
$result = @$doc->loadHTML($this->string, \LIBXML_HTML_NOIMPLIED | \LIBXML_HTML_NODEFDTD);
127+
128+
return (bool) $result;
129+
} catch (\Exception) {
130+
return false;
131+
} finally {
132+
\libxml_clear_errors(); // Clear any errors if needed
133+
\libxml_use_internal_errors(false); // Restore standard error handling
134+
}
135+
}
136+
137+
return false;
138+
}
139+
111140
public function isInteger() : bool
112141
{
113142
if ($this->string === '') {
@@ -177,21 +206,19 @@ public function isXML() : bool
177206
return false;
178207
}
179208

180-
if (\preg_match('/<(.+?)>(.+?)<\/(.+?)>/', $this->string) === 1) {
209+
if (\preg_match('/<xml(.+?)>(.+?)<\/xml>/', $this->string) === 1) {
181210
try {
182211
\libxml_use_internal_errors(true);
183212

184213
$doc = new \DOMDocument();
185-
$result = $doc->loadXML($this->string);
186-
\libxml_clear_errors(); // Clear any errors if needed
187-
\libxml_use_internal_errors(false); // Restore standard error handling
214+
$result = @$doc->loadXML($this->string);
188215

189216
return (bool) $result;
190217
} catch (\Exception) {
218+
return false;
219+
} finally {
191220
\libxml_clear_errors(); // Clear any errors if needed
192221
\libxml_use_internal_errors(false); // Restore standard error handling
193-
194-
return false;
195222
}
196223
}
197224

src/lib/types/src/Flow/Types/Type/TypeDetector.php

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,24 @@
44

55
namespace Flow\Types\Type;
66

7-
use function Flow\Types\DSL\{type_array, type_boolean, type_date, type_datetime, type_enum, type_float, type_instance_of, type_integer, type_json, type_map, type_null, type_string, type_time, type_uuid, type_xml, type_xml_element, types};
7+
use function Flow\Types\DSL\{type_array,
8+
type_boolean,
9+
type_date,
10+
type_datetime,
11+
type_enum,
12+
type_float,
13+
type_html,
14+
type_instance_of,
15+
type_integer,
16+
type_json,
17+
type_map,
18+
type_null,
19+
type_string,
20+
type_time,
21+
type_uuid,
22+
type_xml,
23+
type_xml_element,
24+
types};
825
use Flow\Types\Exception\InvalidArgumentException;
926
use Flow\Types\Type;
1027
use Flow\Types\Type\Logical\{ListType, StructureType};
@@ -46,8 +63,8 @@ public function detectType(mixed $value) : Type
4663
}
4764

4865
$detector = new ArrayContentDetector(
49-
$keyTypes = types(...\array_map($this->detectType(...), \array_keys($value)))->deduplicate(),
50-
$valueTypes = types(...\array_map($this->detectType(...), \array_values($value)))->deduplicate(),
66+
types(...\array_map($this->detectType(...), \array_keys($value)))->deduplicate(),
67+
types(...\array_map($this->detectType(...), \array_values($value)))->deduplicate(),
5168
\array_is_list($value)
5269
);
5370

@@ -102,6 +119,10 @@ public function detectType(mixed $value) : Type
102119
return type_xml_element();
103120
}
104121

122+
if (type_html()->isValid($value)) {
123+
return type_html();
124+
}
125+
105126
return type_instance_of($value::class);
106127
}
107128

src/lib/types/src/Flow/Types/Type/TypeFactory.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
type_date,
1111
type_datetime,
1212
type_float,
13+
type_html,
1314
type_integer,
1415
type_json,
1516
type_mixed,
@@ -79,6 +80,7 @@ public static function fromArray(array $data) : Type
7980
'scalar' => type_scalar(),
8081
'mixed' => type_mixed(),
8182
'numeric-string' => type_numeric_string(),
83+
'html' => type_html(),
8284
default => throw new InvalidArgumentException("Unknown type '" . (\is_string($data['type']) ? $data['type'] : \gettype($data['type'])) . "'"),
8385
};
8486
}

0 commit comments

Comments
 (0)