Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 29 additions & 10 deletions src/core/etl/src/Flow/ETL/DataFrame.php
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,13 @@
OrderEntriesTransformer,
OrderEntries\Comparator,
OrderEntries\TypeComparator,
RenameAllCaseTransformer,
RenameEachTransformer,
RenameEntryTransformer,
RenameStrReplaceAllEntriesTransformer,
ScalarFunctionFilterTransformer,
ScalarFunctionTransformer,
SelectEntriesTransformer,
StyleConverter\RenameStrategy,
UntilTransformer,
WindowFunctionTransformer};
use Flow\Filesystem\Path\Filter;
Expand Down Expand Up @@ -84,7 +85,8 @@ public function autoCast() : self
* Merge/Split Rows yielded by Extractor into batches of given size.
* For example, when Extractor is yielding one row at time, this method will merge them into batches of given size
* before passing them to the next pipeline element.
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given size.
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given
* size.
*
* In order to merge all Rows into a single batch use DataFrame::collect() method or set size to -1 or 0.
*
Expand Down Expand Up @@ -210,7 +212,8 @@ public function crossJoin(self $dataFrame, string $prefix = '') : self

/**
* @param int $limit maximum numbers of rows to display
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20 characters
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20
* characters
* @param Formatter $formatter
*
* @trigger
Expand Down Expand Up @@ -258,7 +261,8 @@ public function dropDuplicates(string|Reference ...$entries) : self
}

/**
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are also removed.
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are
* also removed.
*
* @lazy
*/
Expand Down Expand Up @@ -636,10 +640,12 @@ public function renameAll(string $search, string $replace) : self

/**
* @lazy
*
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
*/
public function renameAllLowerCase() : self
{
$this->pipeline->add(new RenameAllCaseTransformer(lower: true));
$this->renameEach(RenameStrategy::LOWER);

return $this;
}
Expand All @@ -658,30 +664,43 @@ public function renameAllStyle(StringStyles|string $style) : self

/**
* @lazy
*
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
*/
public function renameAllUpperCase() : self
{
$this->pipeline->add(new RenameAllCaseTransformer(upper: true));
$this->renameEach(RenameStrategy::UPPER);

return $this;
}

/**
* @lazy
*
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
*/
public function renameAllUpperCaseFirst() : self
{
$this->pipeline->add(new RenameAllCaseTransformer(ucfirst: true));
$this->renameEach(RenameStrategy::UCFIRST);

return $this;
}

/**
* @lazy
*
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
*/
public function renameAllUpperCaseWord() : self
{
$this->pipeline->add(new RenameAllCaseTransformer(ucwords: true));
$this->renameEach(RenameStrategy::UCWORDS);

return $this;
}

public function renameEach(RenameStrategy $strategy) : self
{
$this->pipeline->add(new RenameEachTransformer($strategy));

return $this;
}
Expand Down Expand Up @@ -825,8 +844,8 @@ public function transform(Transformer|Transformation|Transformations|WithEntry $
}

/**
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding rows.
* Until will send a STOP signal to the Extractor when the condition is not met.
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding
* rows. Until will send a STOP signal to the Extractor when the condition is not met.
*
* @lazy
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,16 @@
use Flow\ETL\Function\ScalarFunction\ExpandResults;
use Flow\ETL\{Loader, Pipeline, Transformer};
use Flow\ETL\Pipeline\{BatchingPipeline, CollectingPipeline, LinkedPipeline, SynchronousPipeline, VoidPipeline};
use Flow\ETL\Transformer\{CallbackRowTransformer, DropEntriesTransformer, EntryNameStyleConverterTransformer, LimitTransformer, RenameAllCaseTransformer, RenameEntryTransformer, RenameStrReplaceAllEntriesTransformer, ScalarFunctionTransformer, SelectEntriesTransformer};
use Flow\ETL\Transformer\{CallbackRowTransformer,
DropEntriesTransformer,
EntryNameStyleConverterTransformer,
LimitTransformer,
RenameAllCaseTransformer,
RenameEachTransformer,
RenameEntryTransformer,
RenameStrReplaceAllEntriesTransformer,
ScalarFunctionTransformer,
SelectEntriesTransformer};

final class LimitOptimization implements Optimization
{
Expand All @@ -27,6 +36,7 @@ final class LimitOptimization implements Optimization
SelectEntriesTransformer::class,
DropEntriesTransformer::class,
RenameAllCaseTransformer::class,
RenameEachTransformer::class,
RenameEntryTransformer::class,
RenameStrReplaceAllEntriesTransformer::class,
LimitTransformer::class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,73 +4,40 @@

namespace Flow\ETL\Transformer;

use Flow\ETL\{FlowContext, Row, Rows, Transformer};
use Flow\ETL\{FlowContext, Rows, Transformer, Transformer\StyleConverter\RenameStrategy};

final readonly class RenameAllCaseTransformer implements Transformer
/**
* @deprecated use RenameEachTransformer with a selected RenameStrategy
*/
final class RenameAllCaseTransformer implements Transformer
{
private RenameEachTransformer $transformer;

public function __construct(
private bool $upper = false,
private bool $lower = false,
private bool $ucfirst = false,
private bool $ucwords = false,
bool $upper = false,
bool $lower = false,
bool $ucfirst = false,
bool $ucwords = false,
) {
}

public function transform(Rows $rows, FlowContext $context) : Rows
{
return $rows->map(function (Row $row) : Row {
foreach ($row->entries()->all() as $entry) {
if ($this->upper) {
$row = $row->rename($entry->name(), \mb_strtoupper($entry->name()));
}

if ($this->lower) {
$row = $row->rename($entry->name(), \mb_strtolower($entry->name()));
}

if ($this->ucfirst) {
$row = $row->rename($entry->name(), $this->ucFirst($entry->name()));
}

if ($this->ucwords) {
$row = $row->rename($entry->name(), $this->ucWords($entry->name()));
}
}

return $row;
});
}
if ($upper) {
$this->transformer = new RenameEachTransformer(RenameStrategy::UPPER);

Check warning on line 23 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L22-L23

Added lines #L22 - L23 were not covered by tests
}

private function ucFirst(string $string) : string
{
// Available from PHP 8.4+
if (\function_exists('mb_ucfirst')) {
return \mb_ucfirst($string);
if ($lower) {
$this->transformer = new RenameEachTransformer(RenameStrategy::LOWER);

Check warning on line 27 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L26-L27

Added lines #L26 - L27 were not covered by tests
}

$encoding = \mb_internal_encoding();
if ($ucfirst) {
$this->transformer = new RenameEachTransformer(RenameStrategy::UCFIRST);

Check warning on line 31 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L30-L31

Added lines #L30 - L31 were not covered by tests
}

return \mb_strtoupper(\mb_substr($string, 0, 1, $encoding), $encoding) . \mb_substr($string, 1, null, $encoding);
if ($ucwords) {
$this->transformer = new RenameEachTransformer(RenameStrategy::UCWORDS);

Check warning on line 35 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L34-L35

Added lines #L34 - L35 were not covered by tests
}
}

private function ucWords(string $string) : string
public function transform(Rows $rows, FlowContext $context) : Rows

Check warning on line 39 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L39

Added line #L39 was not covered by tests
{
$result = '';
$previousCharacter = ' ';

$encoding = \mb_internal_encoding();

for ($i = 0, $length = \mb_strlen($string, $encoding); $i < $length; $i++) {
$currentCharacter = \mb_substr($string, $i, 1, $encoding);

if (' ' === $previousCharacter) {
$currentCharacter = \mb_strtoupper($currentCharacter, $encoding);
}

$result .= $currentCharacter;
$previousCharacter = $currentCharacter;
}

return $result;
return $this->transformer->transform($rows, $context);

Check warning on line 41 in src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php#L41

Added line #L41 was not covered by tests
}
}
74 changes: 74 additions & 0 deletions src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Transformer;

use Flow\ETL\{FlowContext, Row, Rows, Transformer, Transformer\StyleConverter\RenameStrategy};

final readonly class RenameEachTransformer implements Transformer
Comment thread
stloyd marked this conversation as resolved.
Outdated
{
public function __construct(
private RenameStrategy $strategy,
) {
}

public function transform(Rows $rows, FlowContext $context) : Rows
{
return $rows->map(function (Row $row) : Row {
foreach ($row->entries()->all() as $entry) {
$row = match ($this->strategy) {
RenameStrategy::LOWER => $row->rename($entry->name(), \mb_strtolower($entry->name())),
RenameStrategy::UPPER => $row->rename($entry->name(), \mb_strtoupper($entry->name())),
RenameStrategy::UCFIRST => $row->rename($entry->name(), $this->ucFirst($entry->name())),
RenameStrategy::UCWORDS => $row->rename($entry->name(), $this->ucWords($entry->name())),
RenameStrategy::TRANSLITERATE => $row->rename($entry->name(), $this->transliterate($entry->name())),
};
}

return $row;
});
}

private function transliterate(string $string) : string
{
if (\function_exists('transliterator_transliterate')) {
return (string) \transliterator_transliterate('Any-Latin; Latin-ASCII; Lower()', $string);
}

return $string;

Check warning on line 39 in src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php#L39

Added line #L39 was not covered by tests
}

private function ucFirst(string $string) : string
{
// Available from PHP 8.4+
if (\function_exists('mb_ucfirst')) {
return \mb_ucfirst($string);
}

$encoding = \mb_internal_encoding();

Check warning on line 49 in src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php#L49

Added line #L49 was not covered by tests

return \mb_strtoupper(\mb_substr($string, 0, 1, $encoding), $encoding) . \mb_substr($string, 1, null, $encoding);

Check warning on line 51 in src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php

View check run for this annotation

Codecov / codecov/patch

src/core/etl/src/Flow/ETL/Transformer/RenameEachTransformer.php#L51

Added line #L51 was not covered by tests
}

private function ucWords(string $string) : string
{
$result = '';
$previousCharacter = ' ';

$encoding = \mb_internal_encoding();

for ($i = 0, $length = \mb_strlen($string, $encoding); $i < $length; $i++) {
$currentCharacter = \mb_substr($string, $i, 1, $encoding);

if (' ' === $previousCharacter) {
$currentCharacter = \mb_strtoupper($currentCharacter, $encoding);
}

$result .= $currentCharacter;
$previousCharacter = $currentCharacter;
}

return $result;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<?php

declare(strict_types=1);

namespace Flow\ETL\Transformer\StyleConverter;

enum RenameStrategy : string
Comment thread
stloyd marked this conversation as resolved.
Outdated
{
case LOWER = 'LOWER';
case TRANSLITERATE = 'TRANSLITERATE';
case UCFIRST = 'UCFIRST';
case UCWORDS = 'UCWORDS';
case UPPER = 'UPPER';
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

use function Flow\ETL\DSL\{bool_entry, df, from_rows, int_entry, json_entry, ref, str_entry};
use function Flow\ETL\DSL\{row, rows};
use Flow\ETL\{Function\StyleConverter\StringStyles};
use Flow\ETL\{Function\StyleConverter\StringStyles, Transformer\StyleConverter\RenameStrategy};
use Flow\ETL\Tests\FlowIntegrationTestCase;

final class RenameTest extends FlowIntegrationTestCase
Expand Down Expand Up @@ -98,6 +98,24 @@ public function test_rename_all_to_snake_case() : void
);
}

public function test_rename_all_transliterate() : void
{
$rows = rows(row(int_entry('ÓSMY', 8)), row(int_entry('DZIEWIĄTY', 9)));

$ds = df()
->read(from_rows($rows))
->renameEach(RenameStrategy::TRANSLITERATE)
->getEachAsArray();

self::assertEquals(
[
['osmy' => 8],
['dziewiaty' => 9],
],
\iterator_to_array($ds)
);
}

Comment thread
stloyd marked this conversation as resolved.
public function test_rename_all_upper_case() : void
{
$rows = rows(row(int_entry('id', 1), str_entry('name', 'name'), bool_entry('active', true)), row(int_entry('id', 2), str_entry('name', 'name'), bool_entry('active', false)));
Expand Down