Skip to content

Commit 31cc1c6

Browse files
committed
Add a new DataFrame::renameEach()
1 parent 4d82a90 commit 31cc1c6

6 files changed

Lines changed: 150 additions & 12 deletions

File tree

src/core/etl/src/Flow/ETL/DataFrame.php

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,13 @@
4141
OrderEntriesTransformer,
4242
OrderEntries\Comparator,
4343
OrderEntries\TypeComparator,
44-
RenameAllCaseTransformer,
44+
RenameEachTransformer,
4545
RenameEntryTransformer,
4646
RenameStrReplaceAllEntriesTransformer,
4747
ScalarFunctionFilterTransformer,
4848
ScalarFunctionTransformer,
4949
SelectEntriesTransformer,
50+
StyleConverter\RenameStrategy,
5051
UntilTransformer,
5152
WindowFunctionTransformer};
5253
use Flow\Filesystem\Path\Filter;
@@ -84,7 +85,8 @@ public function autoCast() : self
8485
* Merge/Split Rows yielded by Extractor into batches of given size.
8586
* For example, when Extractor is yielding one row at time, this method will merge them into batches of given size
8687
* before passing them to the next pipeline element.
87-
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given size.
88+
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given
89+
* size.
8890
*
8991
* In order to merge all Rows into a single batch use DataFrame::collect() method or set size to -1 or 0.
9092
*
@@ -210,7 +212,8 @@ public function crossJoin(self $dataFrame, string $prefix = '') : self
210212

211213
/**
212214
* @param int $limit maximum numbers of rows to display
213-
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20 characters
215+
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20
216+
* characters
214217
* @param Formatter $formatter
215218
*
216219
* @trigger
@@ -258,7 +261,8 @@ public function dropDuplicates(string|Reference ...$entries) : self
258261
}
259262

260263
/**
261-
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are also removed.
264+
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are
265+
* also removed.
262266
*
263267
* @lazy
264268
*/
@@ -636,10 +640,12 @@ public function renameAll(string $search, string $replace) : self
636640

637641
/**
638642
* @lazy
643+
*
644+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
639645
*/
640646
public function renameAllLowerCase() : self
641647
{
642-
$this->pipeline->add(new RenameAllCaseTransformer(lower: true));
648+
$this->renameEach(RenameStrategy::LOWER);
643649

644650
return $this;
645651
}
@@ -658,30 +664,43 @@ public function renameAllStyle(StringStyles|string $style) : self
658664

659665
/**
660666
* @lazy
667+
*
668+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
661669
*/
662670
public function renameAllUpperCase() : self
663671
{
664-
$this->pipeline->add(new RenameAllCaseTransformer(upper: true));
672+
$this->renameEach(RenameStrategy::UPPER);
665673

666674
return $this;
667675
}
668676

669677
/**
670678
* @lazy
679+
*
680+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
671681
*/
672682
public function renameAllUpperCaseFirst() : self
673683
{
674-
$this->pipeline->add(new RenameAllCaseTransformer(ucfirst: true));
684+
$this->renameEach(RenameStrategy::UCFIRST);
675685

676686
return $this;
677687
}
678688

679689
/**
680690
* @lazy
691+
*
692+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
681693
*/
682694
public function renameAllUpperCaseWord() : self
683695
{
684-
$this->pipeline->add(new RenameAllCaseTransformer(ucwords: true));
696+
$this->renameEach(RenameStrategy::UCWORDS);
697+
698+
return $this;
699+
}
700+
701+
public function renameEach(RenameStrategy $strategy) : self
702+
{
703+
$this->pipeline->add(new RenameEachTransformer($strategy));
685704

686705
return $this;
687706
}
@@ -825,8 +844,8 @@ public function transform(Transformer|Transformation|Transformations|WithEntry $
825844
}
826845

827846
/**
828-
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding rows.
829-
* Until will send a STOP signal to the Extractor when the condition is not met.
847+
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding
848+
* rows. Until will send a STOP signal to the Extractor when the condition is not met.
830849
*
831850
* @lazy
832851
*/

src/core/etl/src/Flow/ETL/Pipeline/Optimizer/LimitOptimization.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,16 @@
88
use Flow\ETL\Function\ScalarFunction\ExpandResults;
99
use Flow\ETL\{Loader, Pipeline, Transformer};
1010
use Flow\ETL\Pipeline\{BatchingPipeline, CollectingPipeline, LinkedPipeline, SynchronousPipeline, VoidPipeline};
11-
use Flow\ETL\Transformer\{CallbackRowTransformer, DropEntriesTransformer, EntryNameStyleConverterTransformer, LimitTransformer, RenameAllCaseTransformer, RenameEntryTransformer, RenameStrReplaceAllEntriesTransformer, ScalarFunctionTransformer, SelectEntriesTransformer};
11+
use Flow\ETL\Transformer\{CallbackRowTransformer,
12+
DropEntriesTransformer,
13+
EntryNameStyleConverterTransformer,
14+
LimitTransformer,
15+
RenameAllCaseTransformer,
16+
RenameEachTransformer,
17+
RenameEntryTransformer,
18+
RenameStrReplaceAllEntriesTransformer,
19+
ScalarFunctionTransformer,
20+
SelectEntriesTransformer};
1221

1322
final class LimitOptimization implements Optimization
1423
{
@@ -27,6 +36,7 @@ final class LimitOptimization implements Optimization
2736
SelectEntriesTransformer::class,
2837
DropEntriesTransformer::class,
2938
RenameAllCaseTransformer::class,
39+
RenameEachTransformer::class,
3040
RenameEntryTransformer::class,
3141
RenameStrReplaceAllEntriesTransformer::class,
3242
LimitTransformer::class,

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
use Flow\ETL\{FlowContext, Row, Rows, Transformer};
88

9+
/**
10+
* @deprecated use RenameAllTransformer with a selected RenameStrategy
11+
*/
912
final readonly class RenameAllCaseTransformer implements Transformer
1013
{
1114
public function __construct(
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Transformer;
6+
7+
use Flow\ETL\{FlowContext, Row, Rows, Transformer, Transformer\StyleConverter\RenameStrategy};
8+
9+
final readonly class RenameEachTransformer implements Transformer
10+
{
11+
public function __construct(
12+
private RenameStrategy $strategy,
13+
) {
14+
}
15+
16+
public function transform(Rows $rows, FlowContext $context) : Rows
17+
{
18+
return $rows->map(function (Row $row) : Row {
19+
foreach ($row->entries()->all() as $entry) {
20+
$row = match ($this->strategy) {
21+
RenameStrategy::LOWER => $row->rename($entry->name(), \mb_strtolower($entry->name())),
22+
RenameStrategy::UPPER => $row->rename($entry->name(), \mb_strtoupper($entry->name())),
23+
RenameStrategy::UCFIRST => $row->rename($entry->name(), $this->ucFirst($entry->name())),
24+
RenameStrategy::UCWORDS => $row->rename($entry->name(), $this->ucWords($entry->name())),
25+
RenameStrategy::TRANSLITERATE => $row->rename($entry->name(), $this->transliterate($entry->name())),
26+
};
27+
}
28+
29+
return $row;
30+
});
31+
}
32+
33+
private function transliterate(string $string) : string
34+
{
35+
if (\function_exists('transliterator_transliterate')) {
36+
return (string) \transliterator_transliterate('Any-Latin; Latin-ASCII; Lower()', $string);
37+
}
38+
39+
return $string;
40+
}
41+
42+
private function ucFirst(string $string) : string
43+
{
44+
// Available from PHP 8.4+
45+
if (\function_exists('mb_ucfirst')) {
46+
return \mb_ucfirst($string);
47+
}
48+
49+
$encoding = \mb_internal_encoding();
50+
51+
return \mb_strtoupper(\mb_substr($string, 0, 1, $encoding), $encoding) . \mb_substr($string, 1, null, $encoding);
52+
}
53+
54+
private function ucWords(string $string) : string
55+
{
56+
$result = '';
57+
$previousCharacter = ' ';
58+
59+
$encoding = \mb_internal_encoding();
60+
61+
for ($i = 0, $length = \mb_strlen($string, $encoding); $i < $length; $i++) {
62+
$currentCharacter = \mb_substr($string, $i, 1, $encoding);
63+
64+
if (' ' === $previousCharacter) {
65+
$currentCharacter = \mb_strtoupper($currentCharacter, $encoding);
66+
}
67+
68+
$result .= $currentCharacter;
69+
$previousCharacter = $currentCharacter;
70+
}
71+
72+
return $result;
73+
}
74+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Transformer\StyleConverter;
6+
7+
enum RenameStrategy : string
8+
{
9+
case LOWER = 'LOWER';
10+
case TRANSLITERATE = 'TRANSLITERATE';
11+
case UCFIRST = 'UCFIRST';
12+
case UCWORDS = 'UCWORDS';
13+
case UPPER = 'UPPER';
14+
}

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/RenameTest.php

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use function Flow\ETL\DSL\{bool_entry, df, from_rows, int_entry, json_entry, ref, str_entry};
88
use function Flow\ETL\DSL\{row, rows};
9-
use Flow\ETL\{Function\StyleConverter\StringStyles};
9+
use Flow\ETL\{Function\StyleConverter\StringStyles, Transformer\StyleConverter\RenameStrategy};
1010
use Flow\ETL\Tests\FlowIntegrationTestCase;
1111

1212
final class RenameTest extends FlowIntegrationTestCase
@@ -98,6 +98,24 @@ public function test_rename_all_to_snake_case() : void
9898
);
9999
}
100100

101+
public function test_rename_all_transliterate() : void
102+
{
103+
$rows = rows(row(int_entry('ÓSMY', 8)), row(int_entry('DZIEWIĄTY', 9)));
104+
105+
$ds = df()
106+
->read(from_rows($rows))
107+
->renameEach(RenameStrategy::TRANSLITERATE)
108+
->getEachAsArray();
109+
110+
self::assertEquals(
111+
[
112+
['osmy' => 8],
113+
['dziewiaty' => 9],
114+
],
115+
\iterator_to_array($ds)
116+
);
117+
}
118+
101119
public function test_rename_all_upper_case() : void
102120
{
103121
$rows = rows(row(int_entry('id', 1), str_entry('name', 'name'), bool_entry('active', true)), row(int_entry('id', 2), str_entry('name', 'name'), bool_entry('active', false)));

0 commit comments

Comments
 (0)