Skip to content

Commit 4a4b74c

Browse files
committed
Adjust DOM-related functions to work with HTML
1 parent 2e68412 commit 4a4b74c

11 files changed

Lines changed: 151 additions & 34 deletions

File tree

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $me
656656
* @return Entry<?HTMLElement>
657657
*/
658658
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
659-
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry // @phpstan-ignore class.notFound,class.notFound
659+
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry
660660
{
661661
return new HTMLElementEntry($name, $value, $metadata);
662662
}
@@ -2006,7 +2006,7 @@ function html_schema(string $name, bool $nullable = false, ?Metadata $metadata =
20062006
* @return Definition<HTMLElement>
20072007
*/
20082008
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
2009-
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition // @phpstan-ignore class.notFound
2009+
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
20102010
{
20112011
return Definition::html_element($name, $nullable, $metadata);
20122012
}

src/core/etl/src/Flow/ETL/Function/DOMElementAttributeValue.php

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,26 @@
55
namespace Flow\ETL\Function;
66

77
use function Flow\Types\DSL\{type_instance_of, type_list};
8+
use Dom\HTMLElement;
89
use Flow\ETL\Row;
910

1011
final class DOMElementAttributeValue extends ScalarFunctionChain
1112
{
1213
public function __construct(
13-
private readonly ScalarFunction|\DOMNode $domElement,
14+
private readonly ScalarFunction|\DOMNode|HTMLElement $domElement,
1415
private readonly ScalarFunction|string $attribute,
1516
) {
1617
}
1718

1819
public function eval(Row $row) : ?string
1920
{
20-
$node = (new Parameter($this->domElement))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
21+
$node = (new Parameter($this->domElement))->as(
22+
$row,
23+
type_instance_of(\DOMNode::class),
24+
type_list(type_instance_of(\DOMNode::class)),
25+
type_instance_of(HTMLElement::class),
26+
type_list(type_instance_of(HTMLElement::class))
27+
);
2128

2229
if ($node instanceof \DOMDocument) {
2330
$node = $node->documentElement;
@@ -33,13 +40,11 @@ public function eval(Row $row) : ?string
3340
return null;
3441
}
3542

36-
if (!$node instanceof \DOMNode || !$node->hasAttributes()) {
43+
if ((!$node instanceof \DOMNode && !$node instanceof HTMLElement) || !$node->hasAttributes()) {
3744
return null;
3845
}
3946

40-
$attributes = $node->attributes;
41-
42-
if (!$namedItem = $attributes->getNamedItem($attributeName)) {
47+
if (!$namedItem = $node->attributes->getNamedItem($attributeName)) {
4348
return null;
4449
}
4550

src/core/etl/src/Flow/ETL/Function/DOMElementAttributesCount.php

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,19 @@
44

55
namespace Flow\ETL\Function;
66

7+
use function Flow\Types\DSL\type_instance_of;
8+
use Dom\HTMlElement;
79
use Flow\ETL\Row;
810

911
final class DOMElementAttributesCount extends ScalarFunctionChain
1012
{
11-
public function __construct(private readonly ScalarFunction|\DOMNode $domElement)
13+
public function __construct(private readonly ScalarFunction|\DOMNode|HTMlElement $domElement)
1214
{
1315
}
1416

1517
public function eval(Row $row) : ?int
1618
{
17-
$domElement = (new Parameter($this->domElement))->asInstanceOf($row, \DOMElement::class);
19+
$domElement = (new Parameter($this->domElement))->as($row, type_instance_of(\DOMElement::class), type_instance_of(HTMlElement::class));
1820

1921
if ($domElement === null) {
2022
return null;

src/core/etl/src/Flow/ETL/Function/DOMElementValue.php

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,24 @@
55
namespace Flow\ETL\Function;
66

77
use function Flow\Types\DSL\{type_instance_of, type_list};
8+
use Dom\HTMLElement;
89
use Flow\ETL\Row;
910

1011
final class DOMElementValue extends ScalarFunctionChain
1112
{
12-
public function __construct(private readonly ScalarFunction|\DOMNode $node)
13+
public function __construct(private readonly ScalarFunction|\DOMNode|HTMLElement $node)
1314
{
1415
}
1516

1617
public function eval(Row $row) : mixed
1718
{
18-
$node = (new Parameter($this->node))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
19+
$node = (new Parameter($this->node))->as(
20+
$row,
21+
type_instance_of(\DOMNode::class),
22+
type_list(type_instance_of(\DOMNode::class)),
23+
type_instance_of(HTMLElement::class),
24+
type_list(type_instance_of(HTMLElement::class))
25+
);
1926

2027
if (\is_array($node) && \count($node)) {
2128
$node = \reset($node);
@@ -25,10 +32,14 @@ public function eval(Row $row) : mixed
2532
$node = $node->documentElement;
2633
}
2734

28-
if (!$node instanceof \DOMElement) {
29-
return null;
35+
if ($node instanceof \DOMNode) {
36+
return $node->nodeValue;
3037
}
3138

32-
return $node->nodeValue;
39+
if ($node instanceof HTMLElement) {
40+
return $node->textContent;
41+
}
42+
43+
return null;
3344
}
3445
}

src/core/etl/src/Flow/ETL/Function/Parameter.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,15 @@ public function __construct(mixed $function)
2525
/**
2626
* @template T
2727
*
28-
* @param Type<T> $type
2928
* @param Type<T> ...$types
3029
*
3130
* @return null|T
3231
*/
33-
public function as(Row $row, Type $type, Type ...$types) : mixed
32+
public function as(Row $row, Type ...$types) : mixed
3433
{
3534
$value = $this->eval($row);
3635

37-
foreach (\array_merge([$type], $types) as $nextType) {
36+
foreach ($types as $nextType) {
3837
if ($nextType->isValid($value)) {
3938
return $value;
4039
}

src/core/etl/src/Flow/ETL/Row/EntryFactory.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public function createAs(string $entryName, mixed $value, Definition|Type $defin
123123
XMLType::class => xml_entry($entryName, null, $metadata),
124124
XMLElementType::class => xml_element_entry($entryName, null, $metadata),
125125
HTMLType::class => html_entry($entryName, null, $metadata),
126+
HTMLElementType::class => html_element_entry($entryName, null, $metadata),
126127
default => throw new InvalidArgumentException("Can't convert value into type \"{$type->toString()}\""),
127128
};
128129
}

src/core/etl/src/Flow/ETL/Schema/Definition.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public static function html(string|Reference $entry, bool $nullable = false, ?Me
152152
/**
153153
* @return Definition<HTMLElement>
154154
*/
155-
public static function html_element(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self // @phpstan-ignore class.notFound
155+
public static function html_element(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self
156156
{
157157
return new self($entry, type_html_element(), $nullable, $metadata);
158158
}

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementAttributeValueTest.php

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,40 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Flow\ETL\Tests\FlowTestCase;
8+
use Dom\{HTMLDocument, HTMLElement};
9+
use PHPUnit\Framework\Attributes\RequiresPhp;
10+
use PHPUnit\Framework\TestCase;
911

10-
final class DOMElementAttributeValueTest extends FlowTestCase
12+
final class DOMElementAttributeValueTest extends TestCase
1113
{
12-
public function test_extracting_attribute_from_dom_element_entry() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_extracting_attribute_from_dom_element_entry() : void
16+
{
17+
$element = HTMLDocument::createFromString('<span id="foobar">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertEquals(
21+
'foobar',
22+
ref('value')->domElementAttributeValue('id')->eval(
23+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
24+
)
25+
);
26+
}
27+
28+
#[RequiresPhp('>= 8.4')]
29+
public function test_html_extracting_non_existing_attribute_from_dom_element_entry() : void
30+
{
31+
$element = HTMLDocument::createFromString('<span">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
32+
33+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
34+
self::assertNull(
35+
ref('value')->domElementAttributeValue('id')->eval(
36+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
37+
)
38+
);
39+
}
40+
41+
public function test_xml_extracting_attribute_from_dom_element_entry() : void
1342
{
1443
$xml = new \DOMDocument();
1544
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');
@@ -23,7 +52,7 @@ public function test_extracting_attribute_from_dom_element_entry() : void
2352
);
2453
}
2554

26-
public function test_extracting_non_existing_attribute_from_dom_element_entry() : void
55+
public function test_xml_extracting_non_existing_attribute_from_dom_element_entry() : void
2756
{
2857
$xml = new \DOMDocument();
2958
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementAttributesCountTest.php

Lines changed: 52 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,45 +5,89 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Flow\ETL\Tests\FlowTestCase;
8+
use Dom\{HTMLDocument, HTMLElement};
9+
use PHPUnit\Framework\Attributes\RequiresPhp;
10+
use PHPUnit\Framework\TestCase;
911

10-
final class DOMElementAttributesCountTest extends FlowTestCase
12+
final class DOMElementAttributesCountTest extends TestCase
1113
{
12-
public function test_attributes_count_on_element_with_multiple_attributes() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_attributes_count_on_element_with_multiple_attributes() : void
16+
{
17+
$element = HTMLDocument::createFromString('<span data-attr="1" data-foo="2" data-bar="3">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertSame(
21+
3,
22+
ref('value')->domElementAttributesCount()->eval(
23+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
24+
)
25+
);
26+
}
27+
28+
#[RequiresPhp('>= 8.4')]
29+
public function test_html_attributes_count_on_element_with_one_attribute() : void
30+
{
31+
$element = HTMLDocument::createFromString('<span data-attr="1"">foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
32+
33+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
34+
self::assertSame(
35+
1,
36+
ref('value')->domElementAttributesCount()->eval(
37+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
38+
)
39+
);
40+
}
41+
42+
#[RequiresPhp('>= 8.4')]
43+
public function test_html_attributes_count_on_element_with_zero_attributes() : void
44+
{
45+
$element = HTMLDocument::createFromString('<span>foobar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
46+
47+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
48+
self::assertSame(
49+
0,
50+
ref('value')->domElementAttributesCount()->eval(
51+
row(flow_context(config())->entryFactory()->create('value', $element->documentElement))
52+
)
53+
);
54+
}
55+
56+
public function test_xml_attributes_count_on_element_with_multiple_attributes() : void
1357
{
1458
$xml = new \DOMDocument();
1559
$xml->loadXML('<root><foo atr-01="1" atr-02="2" atr-03="3">bar</foo></root>');
1660

1761
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
18-
self::assertEquals(
62+
self::assertSame(
1963
3,
2064
ref('value')->domElementAttributesCount()->eval(
2165
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))
2266
)
2367
);
2468
}
2569

26-
public function test_attributes_count_on_element_with_one_attribute() : void
70+
public function test_xml_attributes_count_on_element_with_one_attribute() : void
2771
{
2872
$xml = new \DOMDocument();
2973
$xml->loadXML('<root><foo baz="buz">bar</foo></root>');
3074

3175
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
32-
self::assertEquals(
76+
self::assertSame(
3377
1,
3478
ref('value')->domElementAttributesCount()->eval(
3579
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))
3680
)
3781
);
3882
}
3983

40-
public function test_attributes_count_on_element_with_zero_attributes() : void
84+
public function test_xml_attributes_count_on_element_with_zero_attributes() : void
4185
{
4286
$xml = new \DOMDocument();
4387
$xml->loadXML('<root><foo>bar</foo></root>');
4488

4589
self::assertInstanceOf(\DOMElement::class, $xml->documentElement);
46-
self::assertEquals(
90+
self::assertSame(
4791
0,
4892
ref('value')->domElementAttributesCount()->eval(
4993
row(flow_context(config())->entryFactory()->create('value', $xml->documentElement->firstChild))

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/DOMElementValueTest.php

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,37 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8+
use Dom\{HTMLDocument, HTMLElement};
89
use Flow\ETL\Tests\FlowTestCase;
10+
use PHPUnit\Framework\Attributes\RequiresPhp;
911

1012
final class DOMElementValueTest extends FlowTestCase
1113
{
12-
public function test_getting_element_value_with_children() : void
14+
#[RequiresPhp('>= 8.4')]
15+
public function test_html_getting_element_value_with_children() : void
16+
{
17+
$element = HTMLDocument::createFromString('<p><span>foobar</span></p>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
18+
19+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
20+
self::assertEquals(
21+
'foobar',
22+
ref('value')->domElementValue()->eval(row(flow_context(config())->entryFactory()->create('value', $element->documentElement)))
23+
);
24+
}
25+
26+
#[RequiresPhp('>= 8.4')]
27+
public function test_html_getting_simple_element_value() : void
28+
{
29+
$element = HTMLDocument::createFromString('<span>bar</span>', \LIBXML_HTML_NOIMPLIED | \LIBXML_NOERROR);
30+
31+
self::assertInstanceOf(HTMLElement::class, $element->documentElement);
32+
self::assertEquals(
33+
'bar',
34+
ref('value')->domElementValue()->eval(row(flow_context(config())->entryFactory()->create('value', $element->documentElement)))
35+
);
36+
}
37+
38+
public function test_xml_getting_element_value_with_children() : void
1339
{
1440
$xml = new \DOMDocument();
1541
$xml->loadXML('<root><foo><bar>baz</bar></foo></root>');
@@ -21,7 +47,7 @@ public function test_getting_element_value_with_children() : void
2147
);
2248
}
2349

24-
public function test_getting_simple_element_value() : void
50+
public function test_xml_getting_simple_element_value() : void
2551
{
2652
$xml = new \DOMDocument();
2753
$xml->loadXML('<root><foo>bar</foo></root>');

0 commit comments

Comments
 (0)