Skip to content

Commit 80fdd27

Browse files
committed
Adjust DOM-related functions to work with HTML
1 parent 256c87c commit 80fdd27

11 files changed

Lines changed: 173 additions & 34 deletions

File tree

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $me
656656
* @return Entry<?HTMLElement>
657657
*/
658658
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
659-
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry // @phpstan-ignore class.notFound,class.notFound
659+
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry
660660
{
661661
return new HTMLElementEntry($name, $value, $metadata);
662662
}
@@ -2006,7 +2006,7 @@ function html_schema(string $name, bool $nullable = false, ?Metadata $metadata =
20062006
* @return Definition<HTMLElement>
20072007
*/
20082008
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
2009-
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition // @phpstan-ignore class.notFound
2009+
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
20102010
{
20112011
return Definition::html_element($name, $nullable, $metadata);
20122012
}

src/core/etl/src/Flow/ETL/Function/DOMElementAttributeValue.php

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,33 @@
55
namespace Flow\ETL\Function;
66

77
use function Flow\Types\DSL\{type_instance_of, type_list};
8+
use Dom\HTMLElement;
89
use Flow\ETL\Row;
910

1011
final class DOMElementAttributeValue extends ScalarFunctionChain
1112
{
1213
public function __construct(
13-
private readonly ScalarFunction|\DOMNode $domElement,
14+
private readonly ScalarFunction|\DOMNode|HTMLElement $domElement,
1415
private readonly ScalarFunction|string $attribute,
1516
) {
1617
}
1718

1819
public function eval(Row $row) : ?string
1920
{
20-
$node = (new Parameter($this->domElement))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
21+
$types = [
22+
type_instance_of(\DOMNode::class),
23+
type_list(type_instance_of(\DOMNode::class)),
24+
];
25+
26+
if (\class_exists('\Dom\HTMLElement')) {
27+
$types[] = type_instance_of(HTMLElement::class);
28+
$types[] = type_list(type_instance_of(HTMLElement::class));
29+
}
30+
31+
$node = (new Parameter($this->domElement))->as(
32+
$row,
33+
...$types
34+
);
2135

2236
if ($node instanceof \DOMDocument) {
2337
$node = $node->documentElement;
@@ -33,13 +47,11 @@ public function eval(Row $row) : ?string
3347
return null;
3448
}
3549

36-
if (!$node instanceof \DOMNode || !$node->hasAttributes()) {
50+
if ((!$node instanceof \DOMNode && !$node instanceof HTMLElement) || !$node->hasAttributes()) {
3751
return null;
3852
}
3953

40-
$attributes = $node->attributes;
41-
42-
if (!$namedItem = $attributes->getNamedItem($attributeName)) {
54+
if (!$namedItem = $node->attributes->getNamedItem($attributeName)) {
4355
return null;
4456
}
4557

src/core/etl/src/Flow/ETL/Function/DOMElementAttributesCount.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,27 @@
44

55
namespace Flow\ETL\Function;
66

7+
use function Flow\Types\DSL\type_instance_of;
8+
use Dom\HTMlElement;
79
use Flow\ETL\Row;
810

911
final class DOMElementAttributesCount extends ScalarFunctionChain
1012
{
11-
public function __construct(private readonly ScalarFunction|\DOMNode $domElement)
13+
public function __construct(private readonly ScalarFunction|\DOMNode|HTMlElement $domElement)
1214
{
1315
}
1416

1517
public function eval(Row $row) : ?int
1618
{
17-
$domElement = (new Parameter($this->domElement))->asInstanceOf($row, \DOMElement::class);
19+
$types = [
20+
type_instance_of(\DOMElement::class),
21+
];
22+
23+
if (\class_exists('\Dom\HTMLElement')) {
24+
$types[] = type_instance_of(HTMLElement::class);
25+
}
26+
27+
$domElement = (new Parameter($this->domElement))->as($row, ...$types);
1828

1929
if ($domElement === null) {
2030
return null;

src/core/etl/src/Flow/ETL/Function/DOMElementValue.php

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,31 @@
55
namespace Flow\ETL\Function;
66

77
use function Flow\Types\DSL\{type_instance_of, type_list};
8+
use Dom\HTMLElement;
89
use Flow\ETL\Row;
910

1011
final class DOMElementValue extends ScalarFunctionChain
1112
{
12-
public function __construct(private readonly ScalarFunction|\DOMNode $node)
13+
public function __construct(private readonly ScalarFunction|\DOMNode|HTMLElement $node)
1314
{
1415
}
1516

1617
public function eval(Row $row) : mixed
1718
{
18-
$node = (new Parameter($this->node))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
19+
$types = [
20+
type_instance_of(\DOMNode::class),
21+
type_list(type_instance_of(\DOMNode::class)),
22+
];
23+
24+
if (\class_exists('\Dom\HTMLElement')) {
25+
$types[] = type_instance_of(HTMLElement::class);
26+
$types[] = type_list(type_instance_of(HTMLElement::class));
27+
}
28+
29+
$node = (new Parameter($this->node))->as(
30+
$row,
31+
...$types
32+
);
1933

2034
if (\is_array($node) && \count($node)) {
2135
$node = \reset($node);
@@ -25,10 +39,14 @@ public function eval(Row $row) : mixed
2539
$node = $node->documentElement;
2640
}
2741

28-
if (!$node instanceof \DOMElement) {
29-
return null;
42+
if ($node instanceof \DOMElement) {
43+
return $node->nodeValue;
44+
}
45+
46+
if ($node instanceof HTMLElement) {
47+
return $node->textContent;
3048
}
3149

32-
return $node->nodeValue;
50+
return null;
3351
}
3452
}

src/core/etl/src/Flow/ETL/Function/Parameter.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,15 @@ public function __construct(mixed $function)
2525
/**
2626
* @template T
2727
*
28-
* @param Type<T> $type
2928
* @param Type<T> ...$types
3029
*
3130
* @return null|T
3231
*/
33-
public function as(Row $row, Type $type, Type ...$types) : mixed
32+
public function as(Row $row, Type ...$types) : mixed
3433
{
3534
$value = $this->eval($row);
3635

37-
foreach (\array_merge([$type], $types) as $nextType) {
36+
foreach ($types as $nextType) {
3837
if ($nextType->isValid($value)) {
3938
return $value;
4039
}

src/core/etl/src/Flow/ETL/Row/EntryFactory.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public function createAs(string $entryName, mixed $value, Definition|Type $defin
123123
XMLType::class => xml_entry($entryName, null, $metadata),
124124
XMLElementType::class => xml_element_entry($entryName, null, $metadata),
125125
HTMLType::class => html_entry($entryName, null, $metadata),
126+
HTMLElementType::class => html_element_entry($entryName, null, $metadata),
126127
default => throw new InvalidArgumentException("Can't convert value into type \"{$type->toString()}\""),
127128
};
128129
}

src/core/etl/src/Flow/ETL/Schema/Definition.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public static function html(string|Reference $entry, bool $nullable = false, ?Me
152152
/**
153153
* @return Definition<HTMLElement>
154154
*/
155-
public static function html_element(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self // @phpstan-ignore class.notFound
155+
public static function html_element(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self
156156
{
157157
return new self($entry, type_html_element(), $nullable, $metadata);
158158
}

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementAttributeValueTest.php

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,40 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Flow\ETL\Tests\FlowTestCase;
8+
use Dom\{HTMLDocument, HTMLElement};
9+
use PHPUnit\Framework\Attributes\RequiresPhp;
10+
use PHPUnit\Framework\TestCase;
911

10-
final class DOMElementAttributeValueTest extends FlowTestCase
12+
final class DOMElementAttributeValueTest extends TestCase
1113
{
12-
public function test_extracting_attribute_from_dom_element_entry() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_extracting_attribute_from_dom_element_entry() : void
16+
{
17+
$element = HTMLDocument::createFromString('<span id="foobar">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertEquals(
21+
'foobar',
22+
ref('value')->domElementAttributeValue('id')->eval(
23+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
24+
)
25+
);
26+
}
27+
28+
#[RequiresPhp('>= 8.4')]
29+
public function test_html_extracting_non_existing_attribute_from_dom_element_entry() : void
30+
{
31+
$element = HTMLDocument::createFromString('<span">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
32+
33+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
34+
self::assertNull(
35+
ref('value')->domElementAttributeValue('id')->eval(
36+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
37+
)
38+
);
39+
}
40+
41+
public function test_xml_extracting_attribute_from_dom_element_entry() : void
1342
{
1443
$xml = new \DOMDocument();
1544
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');
@@ -23,7 +52,7 @@ public function test_extracting_attribute_from_dom_element_entry() : void
2352
);
2453
}
2554

26-
public function test_extracting_non_existing_attribute_from_dom_element_entry() : void
55+
public function test_xml_extracting_non_existing_attribute_from_dom_element_entry() : void
2756
{
2857
$xml = new \DOMDocument();
2958
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementAttributesCountTest.php

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,89 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Flow\ETL\Tests\FlowTestCase;
8+
use Dom\{HTMLDocument, HTMLElement};
9+
use PHPUnit\Framework\Attributes\RequiresPhp;
10+
use PHPUnit\Framework\TestCase;
911

10-
final class DOMElementAttributesCountTest extends FlowTestCase
12+
final class DOMElementAttributesCountTest extends TestCase
1113
{
12-
public function test_attributes_count_on_element_with_multiple_attributes() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_attributes_count_on_element_with_multiple_attributes() : void
16+
{
17+
$element = HTMLDocument::createFromString('<span data-attr="1" data-foo="2" data-bar="3">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertSame(
21+
3,
22+
ref('value')->domElementAttributesCount()->eval(
23+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
24+
)
25+
);
26+
}
27+
28+
#[RequiresPhp('>= 8.4')]
29+
public function test_html_attributes_count_on_element_with_one_attribute() : void
30+
{
31+
$element = HTMLDocument::createFromString('<span data-attr="1">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
32+
33+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
34+
self::assertSame(
35+
1,
36+
ref('value')->domElementAttributesCount()->eval(
37+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
38+
)
39+
);
40+
}
41+
42+
#[RequiresPhp('>= 8.4')]
43+
public function test_html_attributes_count_on_element_with_zero_attributes() : void
44+
{
45+
$element = HTMLDocument::createFromString('<span>foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
46+
47+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
48+
self::assertSame(
49+
0,
50+
ref('value')->domElementAttributesCount()->eval(
51+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
52+
)
53+
);
54+
}
55+
56+
public function test_xml_attributes_count_on_element_with_multiple_attributes() : void
1357
{
1458
$xml = new \DOMDocument();
1559
$xml->loadXML('<root><foo atr-01="1" atr-02="2" atr-03="3">bar</foo></root>');
1660

1761
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
18-
self::assertEquals(
62+
self::assertSame(
1963
3,
2064
ref('value')->domElementAttributesCount()->eval(
2165
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))
2266
)
2367
);
2468
}
2569

26-
public function test_attributes_count_on_element_with_one_attribute() : void
70+
public function test_xml_attributes_count_on_element_with_one_attribute() : void
2771
{
2872
$xml = new \DOMDocument();
2973
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');
3074

3175
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
32-
self::assertEquals(
76+
self::assertSame(
3377
1,
3478
ref('value')->domElementAttributesCount()->eval(
3579
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))
3680
)
3781
);
3882
}
3983

40-
public function test_attributes_count_on_element_with_zero_attributes() : void
84+
public function test_xml_attributes_count_on_element_with_zero_attributes() : void
4185
{
4286
$xml = new \DOMDocument();
4387
$xml->loadXML('<root><foo>bar</foo></root>');
4488

4589
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
46-
self::assertEquals(
90+
self::assertSame(
4791
0,
4892
ref('value')->domElementAttributesCount()->eval(
4993
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementValueTest.php

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,37 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8+
use Dom\{HTMLDocument, HTMLElement};
89
use Flow\ETL\Tests\FlowTestCase;
10+
use PHPUnit\Framework\Attributes\RequiresPhp;
911

1012
final class DOMElementValueTest extends FlowTestCase
1113
{
12-
public function test_getting_element_value_with_children() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_getting_element_value_with_children() : void
16+
{
17+
$element = HTMLDocument::createFromString('<p><span>foobar</span></p>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertEquals(
21+
'foobar',
22+
ref('value')->domElementValue()->eval(row(flow_context(config())->entryFactory()->create('value', $element->documentElement)))
23+
);
24+
}
25+
26+
#[RequiresPhp('>= 8.4')]
27+
public function test_html_getting_simple_element_value() : void
28+
{
29+
$element = HTMLDocument::createFromString('<span>bar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
30+
31+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
32+
self::assertEquals(
33+
'bar',
34+
ref('value')->domElementValue()->eval(row(flow_context(config())->entryFactory()->create('value', $element->documentElement)))
35+
);
36+
}
37+
38+
public function test_xml_getting_element_value_with_children() : void
1339
{
1440
$xml = new \DOMDocument();
1541
$xml->loadXML('<root><foo><bar>baz</bar></foo></root>');
@@ -21,7 +47,7 @@ public function test_getting_element_value_with_children() : void
2147
);
2248
}
2349

24-
public function test_getting_simple_element_value() : void
50+
public function test_xml_getting_simple_element_value() : void
2551
{
2652
$xml = new \DOMDocument();
2753
$xml->loadXML('<root><foo>bar</foo></root>');

0 commit comments

Comments
 (0)