Skip to content

Commit 6de9723

Browse files
committed
Add HTMLQuerySelectorAll & HTMLQuerySelector scalar functions
1 parent 17ac6b1 commit 6de9723

20 files changed

Lines changed: 238 additions & 413 deletions

File tree

phpstan.neon

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,15 @@ parameters:
6060
excludePaths:
6161
- src/cli/src/Flow/CLI/Command/*
6262
- src/core/etl/src/Flow/ETL/Formatter/ASCII/ASCIITable.php
63+
- src/core/etl/src/Flow/ETL/Function/HTMLQuerySelectorAll.php
64+
- src/core/etl/src/Flow/ETL/Function/HTMLQuerySelector.php
65+
- src/core/etl/src/Flow/ETL/Row/Entry/HTMLEntry.php
6366
- src/core/etl/src/Flow/ETL/Sort/ExternalSort/RowsMinHeap.php
67+
- src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorAllTest.php
68+
- src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorTest.php
69+
- src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HTMLQuerySelectorAllTest.php
70+
- src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HTMLQuerySelectorTest.php
71+
- src/core/etl/tests/Flow/ETL/Tests/Unit/Row/Entry/HTMLEntryTest.php
6472
- src/adapter/etl-adapter-avro/*
6573
- src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/ElasticsearchPHP/SearchResults.php
6674
- src/adapter/etl-adapter-elasticsearch/src/Flow/ETL/Adapter/Elasticsearch/ElasticsearchPHP/SearchParams.php
@@ -70,6 +78,7 @@ parameters:
7078
- src/lib/parquet/src/Flow/Parquet/ThriftModel/*
7179
- src/lib/parquet/src/Flow/Parquet/BinaryReader/*
7280
- src/lib/parquet/src/Flow/Parquet/Dremel/ColumnData/DefinitionConverter.php
81+
- src/lib/types/src/Flow/Types/Type/Logical/HTMLType.php
7382

7483
tmpDir: var/phpstan/cache
7584

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
type_xml_element as type_xml_element_new,
3838
types as types_new
3939
};
40+
use Dom\HTMLDocument;
4041
use Flow\Calculator\Rounding;
4142
use Flow\ETL\{Analyze,
4243
Attribute\DocumentationDSL,
@@ -205,7 +206,6 @@
205206
UnionType
206207
};
207208
use Flow\Types\Type\Types;
208-
use Flow\Types\Value\HTMLDocument;
209209
use UnitEnum;
210210

211211
/**
@@ -632,7 +632,7 @@ function xml_element_entry(string $name, \DOMElement|string|null $value, ?Metada
632632
* @return Entry<?HTMLDocument>
633633
*/
634634
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
635-
function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $metadata = null) : Entry
635+
function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $metadata = null) : Entry // @phpstan-ignore class.notFound,class.notFound
636636
{
637637
return new HTMLEntry($name, $value, $metadata);
638638
}
@@ -1973,7 +1973,7 @@ function json_schema(string $name, bool $nullable = false, ?Metadata $metadata =
19731973
* @return Definition<HTMLDocument>
19741974
*/
19751975
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
1976-
function html_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
1976+
function html_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition // @phpstan-ignore class.notFound
19771977
{
19781978
return Definition::html($name, $nullable, $metadata);
19791979
}

src/core/etl/src/Flow/ETL/Function/HTMLQuerySelector.php

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,32 +4,30 @@
44

55
namespace Flow\ETL\Function;
66

7-
use function Flow\Types\DSL\type_html;
8-
use Dom\HTMLDocument;
7+
use Dom\{Element, HTMLDocument};
98
use Flow\ETL\Row;
109

1110
final class HTMLQuerySelector extends ScalarFunctionChain
1211
{
1312
public function __construct(
1413
private readonly mixed $value,
15-
private readonly ScalarFunction|string $path,
14+
private readonly ScalarFunction|string $selector,
1615
) {
1716
}
1817

19-
public function eval(Row $row) : mixed
18+
public function eval(Row $row) : ?Element
2019
{
21-
$value = (new Parameter($this->value))->as($row, type_html());
22-
$path = (new Parameter($this->path))->asString($row);
20+
$value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class);
21+
$selector = (new Parameter($this->selector))->asString($row);
2322

24-
if (null === $value || null === $path) {
23+
if (null === $value || null === $selector) {
2524
return null;
2625
}
2726

28-
if (!\class_exists('\Dom\HTMLDocument')) {
29-
throw new \RuntimeException('This function requires \Dom\HTMLDocument extension available in PHP 8.4+.');
27+
if (\PHP_VERSION_ID < 80400) {
28+
throw new \RuntimeException('This function requires \Dom\Element extension available in PHP 8.4+.');
3029
}
3130

32-
return HTMLDocument::createFromString($value->toString())
33-
->querySelector($path);
31+
return $value->querySelector($selector);
3432
}
3533
}

src/core/etl/src/Flow/ETL/Function/HTMLQuerySelectorAll.php

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,34 @@
44

55
namespace Flow\ETL\Function;
66

7-
use function Flow\Types\DSL\type_html;
87
use DOM\{Element, HTMLDocument};
98
use Flow\ETL\Row;
109

1110
final class HTMLQuerySelectorAll extends ScalarFunctionChain
1211
{
1312
public function __construct(
1413
private readonly mixed $value,
15-
private readonly ScalarFunction|string $path,
14+
private readonly ScalarFunction|string $selector,
1615
) {
1716
}
1817

19-
public function eval(Row $row) : mixed
18+
/**
19+
* @return null|array<Element>
20+
*/
21+
public function eval(Row $row) : ?array
2022
{
21-
$value = (new Parameter($this->value))->as($row, type_html());
22-
$path = (new Parameter($this->path))->asString($row);
23+
$value = (new Parameter($this->value))->asInstanceOf($row, HTMLDocument::class);
24+
$selector = (new Parameter($this->selector))->asString($row);
2325

24-
if (null === $value || null === $path) {
26+
if (null === $value || null === $selector) {
2527
return null;
2628
}
2729

28-
if (!\class_exists('\Dom\HTMLDocument')) {
30+
if (\PHP_VERSION_ID < 80400) {
2931
throw new \RuntimeException('This function requires \Dom\HTMLDocument extension available in PHP 8.4+.');
3032
}
3133

32-
/* @phpstan-ignore-next-line */
33-
$result = HTMLDocument::createFromString($value->toString())->querySelectorAll($path);
34+
$result = $value->querySelectorAll($selector);
3435

3536
if (0 === $result->count()) {
3637
return null;
@@ -39,7 +40,6 @@ public function eval(Row $row) : mixed
3940
$nodes = [];
4041

4142
foreach ($result as $node) {
42-
/* @phpstan-ignore-next-line */
4343
if (!$node instanceof Element) {
4444
continue;
4545
}

src/core/etl/src/Flow/ETL/Row/Entry/HTMLEntry.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55
namespace Flow\ETL\Row\Entry;
66

77
use function Flow\Types\DSL\{type_equals, type_html, type_optional};
8+
use Dom\HTMLDocument;
89
use Flow\ETL\Row\{Entry, Reference};
910
use Flow\ETL\Schema\{Definition, Metadata};
1011
use Flow\Types\Type;
11-
use Flow\Types\Value\HTMLDocument;
1212

1313
/**
1414
* @implements Entry<?HTMLDocument>
@@ -32,7 +32,7 @@ public function __construct(
3232
?Metadata $metadata = null,
3333
) {
3434
if (\is_string($value)) {
35-
$this->value = HTMLDocument::fromString($value);
35+
$this->value = HTMLDocument::createFromString($value);
3636
} else {
3737
$this->value = $value;
3838
}
@@ -75,7 +75,7 @@ public function isEqual(Entry $entry) : bool
7575
return false;
7676
}
7777

78-
return $entry->value()?->toString() === $this->value?->toString();
78+
return $entry->value()?->saveHtml() === $this->value?->saveHtml();
7979
}
8080

8181
public function map(callable $mapper) : self
@@ -99,7 +99,7 @@ public function toString() : string
9999
return '';
100100
}
101101

102-
return $this->value->toString();
102+
return $this->value->saveHtml();
103103
}
104104

105105
public function type() : Type

src/core/etl/src/Flow/ETL/Schema/Definition.php

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@
2828
type_xml,
2929
type_xml_element,
3030
types};
31+
use Dom\HTMLDocument;
3132
use Flow\ETL\Exception\{InvalidArgumentException, RuntimeException};
3233
use Flow\ETL\Row\{Entry, EntryReference, Reference};
3334
use Flow\Types\Type;
3435
use Flow\Types\Type\Logical\{ListType, MapType, OptionalType, StructureType};
3536
use Flow\Types\Type\{Native\FloatType, Native\IntegerType, Native\UnionType, TypeFactory};
36-
use Flow\Types\Value\{HTMLDocument, Uuid};
37+
use Flow\Types\Value\Uuid;
3738

3839
/**
3940
* @template-covariant T
@@ -142,7 +143,7 @@ public static function fromArray(array $definition) : self
142143
/**
143144
* @return Definition<HTMLDocument>
144145
*/
145-
public static function html(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self
146+
public static function html(string|Reference $entry, bool $nullable = false, ?Metadata $metadata = null) : self // @phpstan-ignore class.notFound
146147
{
147148
return new self($entry, type_html(), $nullable, $metadata);
148149
}

src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorAllTest.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
namespace Flow\ETL\Tests\Integration\Function;
66

77
use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows};
8-
use Dom\Element;
9-
use Flow\Types\Value\HTMLDocument;
8+
use Dom\{Element, HTMLDocument};
109
use PHPUnit\Framework\Attributes\RequiresPhp;
1110
use PHPUnit\Framework\TestCase;
1211

@@ -16,7 +15,7 @@ final class HTMLQuerySelectorAllTest extends TestCase
1615

1716
protected function setUp() : void
1817
{
19-
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
18+
$this->html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>');
2019
}
2120

2221
#[RequiresPhp('<= 8.4')]

src/core/etl/tests/Flow/ETL/Tests/Integration/Function/HTMLQuerySelectorTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Flow\ETL\Tests\Integration\Function;
66

77
use function Flow\ETL\DSL\{df, from_rows, html_entry, ref, row, rows};
8-
use Flow\Types\Value\HTMLDocument;
8+
use Dom\HTMLDocument;
99
use PHPUnit\Framework\Attributes\RequiresPhp;
1010
use PHPUnit\Framework\TestCase;
1111

@@ -15,7 +15,7 @@ final class HTMLQuerySelectorTest extends TestCase
1515

1616
protected function setUp() : void
1717
{
18-
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
18+
$this->html = HTMLDocument::createFromString('<!DOCTYPE html><html lang="en"><head></head><body><div><span>foobar</span></div></body></html>');
1919
}
2020

2121
#[RequiresPhp('<= 8.4')]

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HTMLQuerySelectorAllTest.php

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Dom\Element;
9-
use Flow\Types\Value\HTMLDocument;
8+
use Dom\{Element, HTMLDocument};
109
use PHPUnit\Framework\Attributes\RequiresPhp;
1110
use PHPUnit\Framework\TestCase;
1211

@@ -16,10 +15,9 @@ final class HTMLQuerySelectorAllTest extends TestCase
1615

1716
protected function setUp() : void
1817
{
19-
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
18+
$this->html = HTMLDocument::createFromString('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
2019
}
2120

22-
#[RequiresPhp('>= 8.4')]
2321
public function test_getting_elements_for_given_path() : void
2422
{
2523
/** @var array<mixed> $result */

src/core/etl/tests/Flow/ETL/Tests/Unit/Function/HTMLQuerySelectorTest.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
namespace Flow\ETL\Tests\Unit\Function;
66

77
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8-
use Dom\Element;
9-
use Flow\Types\Value\HTMLDocument;
8+
use Dom\{Element, HTMLDocument};
109
use PHPUnit\Framework\Attributes\RequiresPhp;
1110
use PHPUnit\Framework\TestCase;
1211

@@ -16,7 +15,7 @@ final class HTMLQuerySelectorTest extends TestCase
1615

1716
protected function setUp() : void
1817
{
19-
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
18+
$this->html = HTMLDocument::createFromString('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
2019
}
2120

2221
#[RequiresPhp('>= 8.4')]

0 commit comments

Comments
 (0)