Skip to content

Commit 8df3e3c

Browse files
authored
Fixed parquet normalizer for nullable entries (#1677)
1 parent 303a7f8 commit 8df3e3c

10 files changed

Lines changed: 172 additions & 36 deletions

File tree

src/adapter/etl-adapter-csv/tests/Flow/ETL/Adapter/CSV/Tests/Unit/EntryNormalizerTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,23 @@ final class EntryNormalizerTest extends FlowTestCase
2222
{
2323
public static function entries_provider() : \Generator
2424
{
25+
yield 'string_nullable' => [str_entry('string', null), null];
2526
yield 'string' => [str_entry('string', 'value'), 'value'];
2627
yield 'int' => [int_entry('integer', 1), 1];
28+
yield 'int_nullable' => [int_entry('integer', null), null];
2729
yield 'float' => [float_entry('float', 1.1), 1.1];
30+
yield 'float_nullable' => [float_entry('float', null), null];
2831
yield 'bool' => [bool_entry('bool', true), 'true'];
32+
yield 'bool_nullable' => [bool_entry('bool', null), null];
2933
yield 'null' => [null_entry('null'), null];
3034
yield 'date' => [date_entry('date', new \DateTimeImmutable('2023-10-01 12:02:01')), '2023-10-01'];
35+
yield 'date_nullable' => [date_entry('date', null), null];
3136
yield 'datetime' => [datetime_entry('datetime', new \DateTimeImmutable('2023-10-01 12:02:01')), '2023-10-01T12:02:01+00:00'];
37+
yield 'datetime_nullable' => [datetime_entry('datetime', null), null];
3238
yield 'time' => [time_entry('time', new \DateInterval('PT1H')), 3600000000];
39+
yield 'time_nullable' => [time_entry('time', null), null];
3340
yield 'uuid' => [uuid_entry('uuid', 'f47ac10b-58cc-4372-a567-0e02b2c3d479'), 'f47ac10b-58cc-4372-a567-0e02b2c3d479'];
41+
yield 'uuid_nullable' => [uuid_entry('uuid', null), null];
3442
}
3543

3644
/**

src/adapter/etl-adapter-json/tests/Flow/ETL/Adapter/JSON/Tests/Unit/EntryNormalizerTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,22 @@ final class EntryNormalizerTest extends FlowTestCase
2323
public static function entries_provider() : \Generator
2424
{
2525
yield 'string' => [str_entry('string', 'value'), 'value'];
26+
yield 'string_nullable' => [str_entry('string', null), null];
2627
yield 'int' => [int_entry('integer', 1), 1];
28+
yield 'int_nullable' => [int_entry('integer', null), null];
2729
yield 'float' => [float_entry('float', 1.1), 1.1];
30+
yield 'float_nullable' => [float_entry('float', null), null];
2831
yield 'bool' => [bool_entry('bool', true), 'true'];
32+
yield 'bool_nullable' => [bool_entry('bool', null), null];
2933
yield 'null' => [null_entry('null'), null];
3034
yield 'date' => [date_entry('date', new \DateTimeImmutable('2023-10-01 12:02:01')), '2023-10-01'];
35+
yield 'date_nullable' => [date_entry('date', null), null];
3136
yield 'datetime' => [datetime_entry('datetime', new \DateTimeImmutable('2023-10-01 12:02:01')), '2023-10-01T12:02:01+00:00'];
37+
yield 'datetime_nullable' => [datetime_entry('datetime', null), null];
3238
yield 'time' => [time_entry('time', new \DateInterval('PT1H')), 3600000000];
39+
yield 'time_nullable' => [time_entry('time', null), null];
3340
yield 'uuid' => [uuid_entry('uuid', 'f47ac10b-58cc-4372-a567-0e02b2c3d479'), 'f47ac10b-58cc-4372-a567-0e02b2c3d479'];
41+
yield 'uuid_nullable' => [uuid_entry('uuid', null), null];
3442
}
3543

3644
/**

src/adapter/etl-adapter-parquet/src/Flow/ETL/Adapter/Parquet/RowsNormalizer.php

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ public function normalize(Rows $rows, Schema $schema) : array
2525
$columns = [];
2626

2727
foreach ($row->entries() as $entry) {
28+
if ($schema->get($entry->ref())->isNullable() && $entry->value() === null) {
29+
$columns[$entry->name()] = null;
30+
31+
continue;
32+
}
33+
2834
$columns[$entry->name()] = match ($entry::class) {
2935
UuidEntry::class => type_string()->cast($entry->value()),
3036
XMLEntry::class => type_string()->cast($entry->value()),

src/adapter/etl-adapter-parquet/tests/Flow/ETL/Adapter/Parquet/Tests/Unit/RowsNormalizerTest.php

Lines changed: 105 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,121 @@
44

55
namespace Flow\ETL\Adapter\Parquet\Tests\Unit;
66

7-
use function Flow\ETL\DSL\{row, rows, schema, str_entry, str_schema};
7+
use function Flow\ETL\DSL\{bool_entry,
8+
bool_schema,
9+
datetime_entry,
10+
datetime_schema,
11+
enum_entry,
12+
enum_schema,
13+
float_entry,
14+
float_schema,
15+
int_entry,
16+
int_schema,
17+
json_entry,
18+
json_schema,
19+
list_entry,
20+
list_schema,
21+
map_entry,
22+
map_schema,
23+
null_entry,
24+
row,
25+
rows,
26+
schema,
27+
string_schema,
28+
struct_entry,
29+
structure_schema,
30+
uuid_entry,
31+
uuid_schema,
32+
xml_entry,
33+
xml_schema};
34+
use function Flow\Types\DSL\{type_datetime, type_float, type_integer, type_list, type_map, type_string, type_structure};
835
use Flow\ETL\Adapter\Parquet\RowsNormalizer;
36+
use Flow\ETL\Tests\Fixtures\Enum\BackedStringEnum;
937
use Flow\ETL\Tests\FlowTestCase;
1038

1139
final class RowsNormalizerTest extends FlowTestCase
1240
{
13-
public function test_normalization_nullable_entry() : void
41+
public function test_normalization_nullable_entries() : void
1442
{
15-
$rows = rows(row(str_entry('id', null)));
16-
$schema = schema(str_schema('id'));
43+
$rows = rows(
44+
row(
45+
int_entry('int', null),
46+
float_entry('float', null),
47+
bool_entry('bool', null),
48+
datetime_entry('datetime', null),
49+
null_entry('null'),
50+
uuid_entry('uuid', null),
51+
json_entry('json', null),
52+
list_entry('list', null, type_list(type_integer())),
53+
list_entry('list_of_datetimes', null, type_list(type_datetime())),
54+
map_entry(
55+
'map',
56+
null,
57+
type_map(type_integer(), type_string())
58+
),
59+
struct_entry(
60+
'struct',
61+
null,
62+
type_structure([
63+
'street' => type_string(),
64+
'city' => type_string(),
65+
'zip' => type_string(),
66+
'country' => type_string(),
67+
'location' => type_structure([
68+
'lat' => type_float(),
69+
'lon' => type_float(),
70+
]),
71+
]),
72+
),
73+
enum_entry('enum', null),
74+
xml_entry('xml', null),
75+
)
76+
);
77+
$schema = schema(
78+
int_schema('int', true),
79+
float_schema('float', true),
80+
bool_schema('bool', true),
81+
datetime_schema('datetime', true),
82+
string_schema('null', nullable: true),
83+
uuid_schema('uuid', true),
84+
json_schema('json', true),
85+
list_schema('list', type_list(type_integer()), true),
86+
list_schema('list_of_datetimes', type_list(type_datetime()), true),
87+
map_schema('map', type_map(type_integer(), type_string()), true),
88+
structure_schema(
89+
'struct',
90+
type_structure([
91+
'street' => type_string(),
92+
'city' => type_string(),
93+
'zip' => type_string(),
94+
'country' => type_string(),
95+
'location' => type_structure([
96+
'lat' => type_float(),
97+
'lon' => type_float(),
98+
]),
99+
]),
100+
true
101+
),
102+
enum_schema('enum', BackedStringEnum::class, true),
103+
xml_schema('xml', true),
104+
);
17105

18106
self::assertEquals(
19107
[
20108
[
21-
'id' => '',
109+
'int' => null,
110+
'float' => null,
111+
'bool' => null,
112+
'datetime' => null,
113+
'null' => null,
114+
'uuid' => null,
115+
'json' => null,
116+
'list' => null,
117+
'list_of_datetimes' => null,
118+
'map' => null,
119+
'struct' => null,
120+
'enum' => null,
121+
'xml' => null,
22122
],
23123
],
24124
(new RowsNormalizer())->normalize($rows, $schema)

src/lib/types/src/Flow/Types/Type/Native/EnumType.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
*/
2121
public function __construct(public string $class)
2222
{
23-
if (!\enum_exists($class)) {
23+
if ($class !== \UnitEnum::class && $this->class !== \BackedEnum::class && !\enum_exists($class)) {
2424
throw new InvalidArgumentException("Enum {$class} not found");
2525
}
2626
}

src/lib/types/tests/Flow/Types/Tests/Unit/Type/Native/EnumTypeTest.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ public static function invalid_assert_data_provider() : \Generator
2222
public static function successful_assert_data_provider() : \Generator
2323
{
2424
yield [SomeEnum::A, SomeEnum::class];
25+
yield [SomeEnum::B, \UnitEnum::class]; // all enums are \UnitEnum
26+
yield [SomeEnum::B, \BackedEnum::class]; // SomeEnum is string backed enum
2527
}
2628

2729
public function test_casting_integer_to_enum() : void

tools/infection/composer.lock

Lines changed: 6 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/phpstan/composer.lock

Lines changed: 5 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/phpunit/composer.lock

Lines changed: 26 additions & 14 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)