Skip to content

Commit d6cbe54

Browse files
committed
Add a new DataFrame::renameEach()
1 parent 4d82a90 commit d6cbe54

6 files changed

Lines changed: 150 additions & 11 deletions

File tree

src/core/etl/src/Flow/ETL/DataFrame.php

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,13 @@
4242
OrderEntries\Comparator,
4343
OrderEntries\TypeComparator,
4444
RenameAllCaseTransformer,
45+
RenameAllTransformer,
4546
RenameEntryTransformer,
4647
RenameStrReplaceAllEntriesTransformer,
4748
ScalarFunctionFilterTransformer,
4849
ScalarFunctionTransformer,
4950
SelectEntriesTransformer,
51+
StyleConverter\RenameStrategy,
5052
UntilTransformer,
5153
WindowFunctionTransformer};
5254
use Flow\Filesystem\Path\Filter;
@@ -84,7 +86,8 @@ public function autoCast() : self
8486
* Merge/Split Rows yielded by Extractor into batches of given size.
8587
* For example, when Extractor is yielding one row at time, this method will merge them into batches of given size
8688
* before passing them to the next pipeline element.
87-
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given size.
89+
* Similarly when Extractor is yielding batches of rows, this method will split them into smaller batches of given
90+
* size.
8891
*
8992
* In order to merge all Rows into a single batch use DataFrame::collect() method or set size to -1 or 0.
9093
*
@@ -210,7 +213,8 @@ public function crossJoin(self $dataFrame, string $prefix = '') : self
210213

211214
/**
212215
* @param int $limit maximum numbers of rows to display
213-
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20 characters
216+
* @param bool|int $truncate false or if set to 0 columns are not truncated, otherwise default truncate to 20
217+
* characters
214218
* @param Formatter $formatter
215219
*
216220
* @trigger
@@ -258,7 +262,8 @@ public function dropDuplicates(string|Reference ...$entries) : self
258262
}
259263

260264
/**
261-
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are also removed.
265+
* Drop all partitions from Rows, additionally when $dropPartitionColumns is set to true, partition columns are
266+
* also removed.
262267
*
263268
* @lazy
264269
*/
@@ -636,10 +641,12 @@ public function renameAll(string $search, string $replace) : self
636641

637642
/**
638643
* @lazy
644+
*
645+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
639646
*/
640647
public function renameAllLowerCase() : self
641648
{
642-
$this->pipeline->add(new RenameAllCaseTransformer(lower: true));
649+
$this->renameEach(RenameStrategy::LOWER);
643650

644651
return $this;
645652
}
@@ -658,30 +665,43 @@ public function renameAllStyle(StringStyles|string $style) : self
658665

659666
/**
660667
* @lazy
668+
*
669+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
661670
*/
662671
public function renameAllUpperCase() : self
663672
{
664-
$this->pipeline->add(new RenameAllCaseTransformer(upper: true));
673+
$this->renameEach(RenameStrategy::UPPER);
665674

666675
return $this;
667676
}
668677

669678
/**
670679
* @lazy
680+
*
681+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
671682
*/
672683
public function renameAllUpperCaseFirst() : self
673684
{
674-
$this->pipeline->add(new RenameAllCaseTransformer(ucfirst: true));
685+
$this->renameEach(RenameStrategy::UCFIRST);
675686

676687
return $this;
677688
}
678689

679690
/**
680691
* @lazy
692+
*
693+
* @deprecated use DataFrame::renameEach() with a selected RenameStrategy
681694
*/
682695
public function renameAllUpperCaseWord() : self
683696
{
684-
$this->pipeline->add(new RenameAllCaseTransformer(ucwords: true));
697+
$this->renameEach(RenameStrategy::UCWORDS);
698+
699+
return $this;
700+
}
701+
702+
public function renameEach(RenameStrategy $strategy) : self
703+
{
704+
$this->pipeline->add(new RenameAllTransformer($strategy));
685705

686706
return $this;
687707
}
@@ -825,8 +845,8 @@ public function transform(Transformer|Transformation|Transformations|WithEntry $
825845
}
826846

827847
/**
828-
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding rows.
829-
* Until will send a STOP signal to the Extractor when the condition is not met.
848+
* The difference between filter and until is that filter will keep filtering rows until extractors finish yielding
849+
* rows. Until will send a STOP signal to the Extractor when the condition is not met.
830850
*
831851
* @lazy
832852
*/

src/core/etl/src/Flow/ETL/Pipeline/Optimizer/LimitOptimization.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,16 @@
88
use Flow\ETL\Function\ScalarFunction\ExpandResults;
99
use Flow\ETL\{Loader, Pipeline, Transformer};
1010
use Flow\ETL\Pipeline\{BatchingPipeline, CollectingPipeline, LinkedPipeline, SynchronousPipeline, VoidPipeline};
11-
use Flow\ETL\Transformer\{CallbackRowTransformer, DropEntriesTransformer, EntryNameStyleConverterTransformer, LimitTransformer, RenameAllCaseTransformer, RenameEntryTransformer, RenameStrReplaceAllEntriesTransformer, ScalarFunctionTransformer, SelectEntriesTransformer};
11+
use Flow\ETL\Transformer\{CallbackRowTransformer,
12+
DropEntriesTransformer,
13+
EntryNameStyleConverterTransformer,
14+
LimitTransformer,
15+
RenameAllCaseTransformer,
16+
RenameAllTransformer,
17+
RenameEntryTransformer,
18+
RenameStrReplaceAllEntriesTransformer,
19+
ScalarFunctionTransformer,
20+
SelectEntriesTransformer};
1221

1322
final class LimitOptimization implements Optimization
1423
{
@@ -27,6 +36,7 @@ final class LimitOptimization implements Optimization
2736
SelectEntriesTransformer::class,
2837
DropEntriesTransformer::class,
2938
RenameAllCaseTransformer::class,
39+
RenameAllTransformer::class,
3040
RenameEntryTransformer::class,
3141
RenameStrReplaceAllEntriesTransformer::class,
3242
LimitTransformer::class,

src/core/etl/src/Flow/ETL/Transformer/RenameAllCaseTransformer.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
use Flow\ETL\{FlowContext, Row, Rows, Transformer};
88

9+
/**
10+
* @deprecated use RenameAllTransformer with a selected RenameStrategy
11+
*/
912
final readonly class RenameAllCaseTransformer implements Transformer
1013
{
1114
public function __construct(
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Transformer;
6+
7+
use Flow\ETL\{FlowContext, Row, Rows, Transformer, Transformer\StyleConverter\RenameStrategy};
8+
9+
final readonly class RenameAllTransformer implements Transformer
10+
{
11+
public function __construct(
12+
private RenameStrategy $strategy,
13+
) {
14+
}
15+
16+
public function transform(Rows $rows, FlowContext $context) : Rows
17+
{
18+
return $rows->map(function (Row $row) : Row {
19+
foreach ($row->entries()->all() as $entry) {
20+
$row = match ($this->strategy) {
21+
RenameStrategy::LOWER => $row->rename($entry->name(), \mb_strtolower($entry->name())),
22+
RenameStrategy::UPPER => $row->rename($entry->name(), \mb_strtoupper($entry->name())),
23+
RenameStrategy::UCFIRST => $row->rename($entry->name(), $this->ucFirst($entry->name())),
24+
RenameStrategy::UCWORDS => $row->rename($entry->name(), $this->ucWords($entry->name())),
25+
RenameStrategy::TRANSLITERATE => $row->rename($entry->name(), $this->transliterate($entry->name())),
26+
};
27+
}
28+
29+
return $row;
30+
});
31+
}
32+
33+
private function ucFirst(string $string) : string
34+
{
35+
// Available from PHP 8.4+
36+
if (\function_exists('mb_ucfirst')) {
37+
return \mb_ucfirst($string);
38+
}
39+
40+
$encoding = \mb_internal_encoding();
41+
42+
return \mb_strtoupper(\mb_substr($string, 0, 1, $encoding), $encoding) . \mb_substr($string, 1, null, $encoding);
43+
}
44+
45+
private function ucWords(string $string) : string
46+
{
47+
$result = '';
48+
$previousCharacter = ' ';
49+
50+
$encoding = \mb_internal_encoding();
51+
52+
for ($i = 0, $length = \mb_strlen($string, $encoding); $i < $length; $i++) {
53+
$currentCharacter = \mb_substr($string, $i, 1, $encoding);
54+
55+
if (' ' === $previousCharacter) {
56+
$currentCharacter = \mb_strtoupper($currentCharacter, $encoding);
57+
}
58+
59+
$result .= $currentCharacter;
60+
$previousCharacter = $currentCharacter;
61+
}
62+
63+
return $result;
64+
}
65+
66+
private function transliterate(string $string): string
67+
{
68+
if (\function_exists('transliterator_transliterate')) {
69+
return (string) \transliterator_transliterate('Any-Latin; Latin-ASCII; Lower()', $string);
70+
}
71+
72+
return $string;
73+
}
74+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Transformer\StyleConverter;
6+
7+
enum RenameStrategy : string
8+
{
9+
case LOWER = 'LOWER';
10+
case UCFIRST = 'UCFIRST';
11+
case UCWORDS = 'UCWORDS';
12+
case UPPER = 'UPPER';
13+
case TRANSLITERATE = 'TRANSLITERATE';
14+
}

src/core/etl/tests/Flow/ETL/Tests/Integration/DataFrame/RenameTest.php

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
use function Flow\ETL\DSL\{bool_entry, df, from_rows, int_entry, json_entry, ref, str_entry};
88
use function Flow\ETL\DSL\{row, rows};
9-
use Flow\ETL\{Function\StyleConverter\StringStyles};
9+
use Flow\ETL\{Function\StyleConverter\StringStyles, Transformer\StyleConverter\RenameStrategy};
1010
use Flow\ETL\Tests\FlowIntegrationTestCase;
1111

1212
final class RenameTest extends FlowIntegrationTestCase
@@ -98,6 +98,24 @@ public function test_rename_all_to_snake_case() : void
9898
);
9999
}
100100

101+
public function test_rename_all_transliterate() : void
102+
{
103+
$rows = rows(row(int_entry('ÓSMY', 8)), row(int_entry('DZIEWIĄTY', 9)));
104+
105+
$ds = df()
106+
->read(from_rows($rows))
107+
->renameEach(RenameStrategy::TRANSLITERATE)
108+
->getEachAsArray();
109+
110+
self::assertEquals(
111+
[
112+
['osmy' => 8],
113+
['dziewiaty' => 9],
114+
],
115+
\iterator_to_array($ds)
116+
);
117+
}
118+
101119
public function test_rename_all_upper_case() : void
102120
{
103121
$rows = rows(row(int_entry('id', 1), str_entry('name', 'name'), bool_entry('active', true)), row(int_entry('id', 2), str_entry('name', 'name'), bool_entry('active', false)));

0 commit comments

Comments
 (0)