Skip to content

Commit 584f491

Browse files
authored
Fixed massive performance bottleneck in parquet library (#1735)
* Fixed massive performance bottleneck in parquet library * Added missing schema_to_ascii dsl method
1 parent 825dc40 commit 584f491

5 files changed

Lines changed: 29 additions & 8 deletions

File tree

shell.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ in
1212
{
1313
pkgs ? lockedPkgs,
1414
php-version ? 8.2,
15-
with-pcov ? true,
15+
with-blackfire ? false,
1616
with-xdebug ? false,
17-
with-blackfire ? false
17+
with-pcov ? !with-blackfire,
1818
}:
1919

2020
let

src/core/etl/src/Flow/ETL/DSL/functions.php

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,8 @@
161161
use Flow\ETL\Row\Entry\{BooleanEntry, DateEntry, DateTimeEntry, EnumEntry, FloatEntry, IntegerEntry, JsonEntry, ListEntry, MapEntry, StringEntry, StructureEntry, TimeEntry, UuidEntry, XMLElementEntry, XMLEntry};
162162
use Flow\ETL\Row\{Entry, EntryReference, Reference, References};
163163
use Flow\ETL\Row\Formatter\ASCIISchemaFormatter;
164-
use Flow\ETL\Schema\{Definition};
165-
use Flow\ETL\Schema\Formatter\JsonSchemaFormatter;
164+
use Flow\ETL\Schema\{Definition, Formatter\PHPFormatter\TypeFormatter, Formatter\PHPFormatter\ValueFormatter};
165+
use Flow\ETL\Schema\Formatter\{JsonSchemaFormatter, PHPSchemaFormatter};
166166
use Flow\ETL\Schema\Metadata;
167167
use Flow\ETL\Schema\Validator\{EvolvingValidator, SelectiveValidator, StrictValidator};
168168
use Flow\ETL\Transformer\OrderEntries\{CombinedComparator, Comparator, NameComparator, Order, TypeComparator, TypePriorities};
@@ -1673,6 +1673,24 @@ function schema_to_json(Schema $schema, bool $pretty = false) : string
16731673
return (new JsonSchemaFormatter($pretty))->format($schema);
16741674
}
16751675

1676+
/**
1677+
* @param Schema $schema
1678+
*/
1679+
#[DocumentationDSL(module: Module::CORE, type: DSLType::HELPER)]
1680+
function schema_to_php(Schema $schema, ValueFormatter $valueFormatter = new ValueFormatter(), TypeFormatter $typeFormatter = new TypeFormatter()) : string
1681+
{
1682+
return (new PHPSchemaFormatter($valueFormatter, $typeFormatter))->format($schema);
1683+
}
1684+
1685+
/**
1686+
* @param Schema $schema
1687+
*/
1688+
#[DocumentationDSL(module: Module::CORE, type: DSLType::HELPER)]
1689+
function schema_to_ascii(Schema $schema, ?SchemaFormatter $formatter = null) : string
1690+
{
1691+
return ($formatter ?? new ASCIISchemaFormatter())->format($schema);
1692+
}
1693+
16761694
/**
16771695
* @param Schema $expected
16781696
* @param Schema $given
@@ -1981,6 +1999,8 @@ function get_type(mixed $value) : Type
19811999

19822000
/**
19832001
* @param Schema $schema
2002+
*
2003+
* @deprecated Please use schema_to_ascii($schema) instead
19842004
*/
19852005
#[DocumentationDSL(module: Module::CORE, type: DSLType::SCHEMA)]
19862006
function print_schema(Schema $schema, ?SchemaFormatter $formatter = null) : string

src/core/etl/src/Flow/ETL/DataFrame.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,7 @@ public function run(?callable $callback = null, bool|Analyze $analyze = false) :
757757
$analyze = $analyze === true ? analyze() : $analyze;
758758

759759
if ($analyze) {
760+
gc_collect_cycles();
760761
$memory = new Consumption();
761762
$startedAt = $this->context->config->clock()->now();
762763
$startTime = HighResolutionTime::now();

src/lib/parquet/src/Flow/Parquet/ParquetFile/RowGroupBuilder/ColumnData/FlatColumnValues.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,9 @@ public function merge(self $flatData) : self
9292
throw new RuntimeException('Cannot merge different column, attempt to merge: ' . $this->column->flatPath() . ' with ' . $flatData->column->flatPath());
9393
}
9494

95-
$this->repetitionLevels = [...$this->repetitionLevels, ...$flatData->repetitionLevels];
96-
$this->definitionLevels = [...$this->definitionLevels, ...$flatData->definitionLevels];
97-
$this->values = [...$this->values, ...$flatData->values];
95+
array_push($this->repetitionLevels, ...$flatData->repetitionLevels);
96+
array_push($this->definitionLevels, ...$flatData->definitionLevels);
97+
array_push($this->values, ...$flatData->values);
9898

9999
return $this;
100100
}

web/landing/resources/dsl.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)