Skip to content

Commit 2007f0d

Browse files
committed
Add HTMLQuerySelectorAll & HTMLQuerySelector scalar functions
1 parent d9b8581 commit 2007f0d

5 files changed

Lines changed: 196 additions & 0 deletions

File tree

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Function;
6+
7+
use function Flow\Types\DSL\type_html;
8+
use Dom\HTMLDocument;
9+
use Flow\ETL\Row;
10+
11+
final class HTMLQuerySelector extends ScalarFunctionChain
12+
{
13+
public function __construct(
14+
private readonly mixed $value,
15+
private readonly ScalarFunction|string $path,
16+
) {
17+
}
18+
19+
public function eval(Row $row) : mixed
20+
{
21+
$value = (new Parameter($this->value))->as($row, type_html());
22+
$path = (new Parameter($this->path))->asString($row);
23+
24+
if (null === $value || null === $path) {
25+
return null;
26+
}
27+
28+
if (!\class_exists('\Dom\HTMLDocument')) {
29+
return null;
30+
}
31+
32+
return HTMLDocument::createFromString($value->toString())
33+
->querySelector($path);
34+
}
35+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Function;
6+
7+
use function Flow\Types\DSL\type_html;
8+
use DOM\{Element, HTMLDocument};
9+
use Flow\ETL\Row;
10+
11+
final class HTMLQuerySelectorAll extends ScalarFunctionChain
12+
{
13+
public function __construct(
14+
private readonly mixed $value,
15+
private readonly ScalarFunction|string $path,
16+
) {
17+
}
18+
19+
public function eval(Row $row) : mixed
20+
{
21+
$value = (new Parameter($this->value))->as($row, type_html());
22+
$path = (new Parameter($this->path))->asString($row);
23+
24+
if (null === $value || null === $path) {
25+
return null;
26+
}
27+
28+
if (!\class_exists('\Dom\HTMLDocument')) {
29+
return null;
30+
}
31+
32+
/* @phpstan-ignore-next-line */
33+
$result = HTMLDocument::createFromString($value->toString())->querySelectorAll($path);
34+
35+
if (0 === $result->count()) {
36+
return null;
37+
}
38+
39+
$nodes = [];
40+
41+
foreach ($result as $node) {
42+
/* @phpstan-ignore-next-line */
43+
if (!$node instanceof Element) {
44+
continue;
45+
}
46+
47+
$nodes[] = $node;
48+
}
49+
50+
return $nodes;
51+
}
52+
}

src/core/etl/src/Flow/ETL/Function/ScalarFunctionChain.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,16 @@ public function hash(Algorithm $algorithm = new NativePHPHash()) : Hash
311311
return new Hash($this, $algorithm);
312312
}
313313

314+
public function htmlQuerySelector(ScalarFunction|string $path) : HTMLQuerySelector
315+
{
316+
return new HTMLQuerySelector($this, $path);
317+
}
318+
319+
public function htmlQuerySelectorAll(ScalarFunction|string $path) : HTMLQuerySelectorAll
320+
{
321+
return new HTMLQuerySelectorAll($this, $path);
322+
}
323+
314324
/**
315325
* Returns the index of given $needle in string.
316326
*/
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Tests\Unit\Function;
6+
7+
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8+
use Dom\Element;
9+
use Flow\Types\Value\HTMLDocument;
10+
use PHPUnit\Framework\Attributes\RequiresPhp;
11+
use PHPUnit\Framework\TestCase;
12+
13+
final class HTMLQuerySelectorAllTest extends TestCase
14+
{
15+
private HTMLDocument $html;
16+
17+
protected function setUp() : void
18+
{
19+
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
20+
}
21+
22+
#[RequiresPhp('>= 8.4')]
23+
public function test_getting_elements_for_given_path() : void
24+
{
25+
/** @var array<mixed> $result */
26+
$result = ref('value')->htmlQuerySelectorAll('body div span')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
27+
28+
self::assertCount(1, $result);
29+
30+
/* @phpstan-ignore-next-line */
31+
self::assertInstanceOf(Element::class, $result[0]);
32+
}
33+
34+
#[RequiresPhp('< 8.4')]
35+
public function test_getting_null_for_older_versions() : void
36+
{
37+
$result = ref('value')->htmlQuerySelectorAll('body div p')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
38+
39+
self::assertNull($result);
40+
}
41+
42+
#[RequiresPhp('>= 8.4')]
43+
public function test_getting_null_when_nothing_found() : void
44+
{
45+
$html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
46+
47+
$result = ref('value')->htmlQuerySelectorAll('body div p')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
48+
49+
self::assertNull($result);
50+
}
51+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Tests\Unit\Function;
6+
7+
use function Flow\ETL\DSL\{config, flow_context, ref, row};
8+
use Dom\Element;
9+
use Flow\Types\Value\HTMLDocument;
10+
use PHPUnit\Framework\Attributes\RequiresPhp;
11+
use PHPUnit\Framework\TestCase;
12+
13+
final class HTMLQuerySelectorTest extends TestCase
14+
{
15+
private HTMLDocument $html;
16+
17+
protected function setUp() : void
18+
{
19+
$this->html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
20+
}
21+
22+
#[RequiresPhp('>= 8.4')]
23+
public function test_getting_elements_for_given_path() : void
24+
{
25+
$result = ref('value')->htmlQuerySelector('body div span')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
26+
27+
/* @phpstan-ignore-next-line */
28+
self::assertInstanceOf(Element::class, $result);
29+
}
30+
31+
#[RequiresPhp('< 8.4')]
32+
public function test_getting_null_for_older_versions() : void
33+
{
34+
$result = ref('value')->htmlQuerySelector('body div p')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
35+
36+
self::assertNull($result);
37+
}
38+
39+
#[RequiresPhp('>= 8.4')]
40+
public function test_getting_null_when_nothing_found() : void
41+
{
42+
$html = new HTMLDocument('<!DOCTYPE html><html><head></head><body><div><span>foobar</span></div></body></html>');
43+
44+
$result = ref('value')->htmlQuerySelector('body div p')->eval(row(flow_context(config())->entryFactory()->create('value', $this->html)));
45+
46+
self::assertNull($result);
47+
}
48+
}

0 commit comments

Comments
 (0)