Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion phpstan.neon
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ parameters:

ignoreErrors:
-
message: '#Dom\\(HTMLDocument|Element)#i'
message: '#Dom\\(HTMLDocument|HTMLElement|Element)#i'
Comment thread
norberttech marked this conversation as resolved.
identifier: class.notFound

includes:
Expand Down
2 changes: 2 additions & 0 deletions rector.tests.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
use Flow\ETL\FlowContext;
use Flow\Types\Type\Logical\DateTimeType;
use Flow\Types\Type\Logical\DateType;
use Flow\Types\Type\Logical\HTMLElementType;
use Flow\Types\Type\Logical\HTMLType;
use Flow\Types\Type\Logical\JsonType;
use Flow\Types\Type\Logical\ListType;
Expand Down Expand Up @@ -178,6 +179,7 @@
new NewObjectToFunction(XMLElementType::class, 'Flow\ETL\DSL\type_xml_element'),
new NewObjectToFunction(XMLType::class, 'Flow\ETL\DSL\type_xml'),
new NewObjectToFunction(HTMLType::class, 'Flow\ETL\DSL\type_html'),
new NewObjectToFunction(HTMLElementType::class, 'Flow\ETL\DSL\type_html_element'),

// Extractors
new NewObjectToFunction(CacheExtractor::class, 'from_cache'),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,14 @@

use function Flow\Types\DSL\type_string;
use Doctrine\DBAL\Schema\{Column, Index, Table};
use Doctrine\DBAL\Types\{DateImmutableType, DateTimeImmutableType, GuidType, TimeImmutableType};
use Doctrine\DBAL\Types\Type as DbalType;
use Flow\ETL\Exception\InvalidArgumentException;
use Flow\ETL\Schema;
use Flow\ETL\Schema\{Definition, Metadata};
use Flow\Types\Type;
use Flow\Types\Type\Logical\{DateTimeType,
DateType,
JsonType,
ListType,
MapType,
StructureType,
TimeType,
UuidType,
XMLElementType,
XMLType};
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};

final readonly class SchemaConverter
{
public const DEFAULT_TYPES = [
Comment thread
stloyd marked this conversation as resolved.
StringType::class => \Doctrine\DBAL\Types\StringType::class,
IntegerType::class => \Doctrine\DBAL\Types\IntegerType::class,
FloatType::class => \Doctrine\DBAL\Types\FloatType::class,
BooleanType::class => \Doctrine\DBAL\Types\BooleanType::class,
DateType::class => DateImmutableType::class,
TimeType::class => TimeImmutableType::class,
DateTimeType::class => DateTimeImmutableType::class,
UuidType::class => GuidType::class,
JsonType::class => \Doctrine\DBAL\Types\JsonType::class,
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
ListType::class => \Doctrine\DBAL\Types\JsonType::class,
MapType::class => \Doctrine\DBAL\Types\JsonType::class,
StructureType::class => \Doctrine\DBAL\Types\JsonType::class,
];

private TypesMap $typesMap;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
use Flow\Types\Type as FlowType;
use Flow\Types\Type\Logical\{DateTimeType,
DateType,
HTMLElementType,
HTMLType,
JsonType,
ListType,
MapType,
Expand Down Expand Up @@ -63,6 +65,8 @@ final class TypesMap
JsonType::class => \Doctrine\DBAL\Types\JsonType::class,
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
HTMLType::class => \Doctrine\DBAL\Types\StringType::class,
HTMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
ListType::class => \Doctrine\DBAL\Types\JsonType::class,
MapType::class => \Doctrine\DBAL\Types\JsonType::class,
StructureType::class => \Doctrine\DBAL\Types\JsonType::class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,18 @@
use Doctrine\DBAL\Types\DateTimeTzType;
use Flow\ETL\Adapter\Doctrine\TypesMap;
use Flow\ETL\Exception\InvalidArgumentException;
use Flow\Types\Type\Logical\{DateTimeType, DateType, JsonType, ListType, MapType, StructureType, TimeType, UuidType, XMLElementType, XMLType};
use Flow\Types\Type\Logical\{DateTimeType,
DateType,
HTMLElementType,
HTMLType,
JsonType,
ListType,
MapType,
StructureType,
TimeType,
UuidType,
XMLElementType,
XMLType};
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};
use PHPUnit\Framework\TestCase;

Expand Down Expand Up @@ -175,6 +186,8 @@ public function test_default_flow_types_constant_mapping() : void
JsonType::class => DbalJsonType::class,
XMLType::class => \Doctrine\DBAL\Types\StringType::class,
XMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
HTMLType::class => \Doctrine\DBAL\Types\StringType::class,
HTMLElementType::class => \Doctrine\DBAL\Types\StringType::class,
ListType::class => DbalJsonType::class,
MapType::class => DbalJsonType::class,
StructureType::class => DbalJsonType::class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
use Flow\Types\Type;
use Flow\Types\Type\Logical\{DateTimeType,
DateType,
HTMLType,
JsonType,
ListType,
MapType,
Expand All @@ -34,6 +35,7 @@
UuidType,
XMLElementType,
XMLType};
use Flow\Types\Type\Logical\HTMLElementType;
use Flow\Types\Type\Native\{BooleanType, FloatType, IntegerType, StringType};

final class SchemaConverter
Expand Down Expand Up @@ -77,6 +79,10 @@ private function flowToParquet(string $name, Type $type, bool $nullable) : Colum
return FlatColumn::float($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case IntegerType::class:
return FlatColumn::int64($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case HTMLType::class:
case HTMLElementType::class:
case XMLElementType::class:
case XMLType::class:
case StringType::class:
return FlatColumn::string($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case BooleanType::class:
Expand All @@ -91,9 +97,6 @@ private function flowToParquet(string $name, Type $type, bool $nullable) : Colum
return FlatColumn::uuid($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case JsonType::class:
return FlatColumn::json($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case XMLType::class:
case XMLElementType::class:
return FlatColumn::string($name, $nullable ? ParquetSchema\Repetition::OPTIONAL : ParquetSchema\Repetition::REQUIRED);
case ListType::class:
$elementType = $type->element();
$elementOptional = $elementType instanceof OptionalType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,18 @@
use Flow\ETL\Schema;
use Flow\ETL\Schema\{Definition, Metadata};
use Flow\Types\Type;
use Flow\Types\Type\Logical\{DateTimeType, DateType, JsonType, ListType, MapType, StructureType, TimeType, UuidType, XMLElementType, XMLType};
use Flow\Types\Type\Logical\{DateTimeType,
DateType,
HTMLElementType,
HTMLType,
JsonType,
ListType,
MapType,
StructureType,
TimeType,
UuidType,
XMLElementType,
XMLType};
use Flow\Types\Type\Native\{ArrayType, BooleanType, EnumType, FloatType, IntegerType, StringType};

/**
Expand Down Expand Up @@ -423,13 +434,15 @@ private function convertTypeToOpenAPI(Type $type) : array
BooleanType::class => ['type' => 'boolean'],
IntegerType::class => ['type' => 'integer'],
FloatType::class => ['type' => 'number'],
StringType::class => ['type' => 'string'],
StringType::class,
HTMLType::class,
HTMLElementType::class => ['type' => 'string'],
DateType::class => ['type' => 'string', 'format' => 'date'],
DateTimeType::class => ['type' => 'string', 'format' => 'date-time'],
TimeType::class => ['type' => 'string', 'format' => 'time'],
UuidType::class => ['type' => 'string', 'format' => 'uuid'],
JsonType::class => ['type' => 'string', 'format' => 'json'],
XMLType::class => ['type' => 'string', 'format' => 'xml'],
XMLType::class,
XMLElementType::class => ['type' => 'string', 'format' => 'xml'],
EnumType::class => $this->convertEnumToOpenAPI($type),
ArrayType::class => $this->convertArrayToOpenAPI($type),
Expand Down
37 changes: 35 additions & 2 deletions src/core/etl/src/Flow/ETL/DSL/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
type_xml_element as type_xml_element_new,
types as types_new
};
use Dom\HTMLDocument;
use Dom\{HTMLDocument, HTMLElement};
use Flow\Calculator\Rounding;
use Flow\ETL\{Analyze,
Attribute\DocumentationDSL,
Expand Down Expand Up @@ -165,7 +165,22 @@
use Flow\ETL\Retry\DelayFactory\{Fixed, Fixed\FixedMilliseconds};
use Flow\ETL\Retry\RetryStrategy\{AnyThrowable, OnExceptionTypes};
use Flow\ETL\Row\{Entries, EntryFactory, SortOrder};
use Flow\ETL\Row\Entry\{BooleanEntry, DateEntry, DateTimeEntry, EnumEntry, FloatEntry, IntegerEntry, JsonEntry, ListEntry, MapEntry, StringEntry, StructureEntry, TimeEntry, UuidEntry, XMLElementEntry, XMLEntry};
use Flow\ETL\Row\Entry\{BooleanEntry,
DateEntry,
DateTimeEntry,
EnumEntry,
FloatEntry,
HTMLElementEntry,
IntegerEntry,
JsonEntry,
ListEntry,
MapEntry,
StringEntry,
StructureEntry,
TimeEntry,
UuidEntry,
XMLElementEntry,
XMLEntry};
use Flow\ETL\Row\Entry\HTMLEntry;
use Flow\ETL\Row\{Entry, EntryReference, Reference, References};
use Flow\ETL\Row\Formatter\ASCIISchemaFormatter;
Expand Down Expand Up @@ -637,6 +652,15 @@ function html_entry(string $name, HTMLDocument|string|null $value, ?Metadata $me
return new HTMLEntry($name, $value, $metadata);
}

/**
* @return Entry<?HTMLElement>
*/
#[DocumentationDSL(module: Module::CORE, type: DSLType::ENTRY)]
function html_element_entry(string $name, HTMLElement|string|null $value, ?Metadata $metadata = null) : Entry
{
return new HTMLElementEntry($name, $value, $metadata);
}

/**
* @param Entry<mixed> ...$entries
*/
Expand Down Expand Up @@ -1978,6 +2002,15 @@ function html_schema(string $name, bool $nullable = false, ?Metadata $metadata =
return Definition::html($name, $nullable, $metadata);
}

/**
* @return Definition<HTMLElement>
*/
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
function html_element_schema(string $name, bool $nullable = false, ?Metadata $metadata = null) : Definition
{
return Definition::html_element($name, $nullable, $metadata);
}

/**
* @return Definition<\DOMDocument>
*/
Expand Down
24 changes: 18 additions & 6 deletions src/core/etl/src/Flow/ETL/Function/DOMElementAttributeValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,33 @@
namespace Flow\ETL\Function;

use function Flow\Types\DSL\{type_instance_of, type_list};
use Dom\HTMLElement;
use Flow\ETL\Row;

final class DOMElementAttributeValue extends ScalarFunctionChain
{
public function __construct(
private readonly ScalarFunction|\DOMNode $domElement,
private readonly ScalarFunction|\DOMNode|HTMLElement $domElement,
private readonly ScalarFunction|string $attribute,
) {
}

public function eval(Row $row) : ?string
{
$node = (new Parameter($this->domElement))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
$types = [
type_instance_of(\DOMNode::class),
type_list(type_instance_of(\DOMNode::class)),
];

if (\class_exists('\Dom\HTMLElement')) {
$types[] = type_instance_of(HTMLElement::class);
$types[] = type_list(type_instance_of(HTMLElement::class));
}

$node = (new Parameter($this->domElement))->as(
$row,
...$types
);

if ($node instanceof \DOMDocument) {
$node = $node->documentElement;
Expand All @@ -33,13 +47,11 @@ public function eval(Row $row) : ?string
return null;
}

if (!$node instanceof \DOMNode || !$node->hasAttributes()) {
if ((!$node instanceof \DOMNode && !$node instanceof HTMLElement) || !$node->hasAttributes()) {
return null;
}

$attributes = $node->attributes;

if (!$namedItem = $attributes->getNamedItem($attributeName)) {
if (!$namedItem = $node->attributes->getNamedItem($attributeName)) {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,27 @@

namespace Flow\ETL\Function;

use function Flow\Types\DSL\type_instance_of;
use Dom\HTMlElement;
use Flow\ETL\Row;

final class DOMElementAttributesCount extends ScalarFunctionChain
{
public function __construct(private readonly ScalarFunction|\DOMNode $domElement)
public function __construct(private readonly ScalarFunction|\DOMNode|HTMlElement $domElement)
{
}

public function eval(Row $row) : ?int
{
$domElement = (new Parameter($this->domElement))->asInstanceOf($row, \DOMElement::class);
$types = [
type_instance_of(\DOMElement::class),
];

if (\class_exists('\Dom\HTMLElement')) {
$types[] = type_instance_of(HTMLElement::class);
}

$domElement = (new Parameter($this->domElement))->as($row, ...$types);

if ($domElement === null) {
return null;
Expand Down
28 changes: 23 additions & 5 deletions src/core/etl/src/Flow/ETL/Function/DOMElementValue.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,31 @@
namespace Flow\ETL\Function;

use function Flow\Types\DSL\{type_instance_of, type_list};
use Dom\HTMLElement;
use Flow\ETL\Row;

final class DOMElementValue extends ScalarFunctionChain
{
public function __construct(private readonly ScalarFunction|\DOMNode $node)
public function __construct(private readonly ScalarFunction|\DOMNode|HTMLElement $node)
{
}

public function eval(Row $row) : mixed
{
$node = (new Parameter($this->node))->as($row, type_instance_of(\DOMNode::class), type_list(type_instance_of(\DOMNode::class)));
$types = [
type_instance_of(\DOMNode::class),
type_list(type_instance_of(\DOMNode::class)),
];

if (\class_exists('\Dom\HTMLElement')) {
$types[] = type_instance_of(HTMLElement::class);
$types[] = type_list(type_instance_of(HTMLElement::class));
}

$node = (new Parameter($this->node))->as(
$row,
...$types
);

if (\is_array($node) && \count($node)) {
$node = \reset($node);
Expand All @@ -25,10 +39,14 @@ public function eval(Row $row) : mixed
$node = $node->documentElement;
}

if (!$node instanceof \DOMElement) {
return null;
if ($node instanceof \DOMElement) {
return $node->nodeValue;
}

if ($node instanceof HTMLElement) {
return $node->textContent;
}

return $node->nodeValue;
return null;
}
}
Loading