Skip to content

Commit 8a24f10

Browse files
norberttechCopilot
andauthored
Remove entry object from column statistics (#1561)
* Removed entry reference from column statistics * Added codeowners and updated pull request template * Update .github/PULL_REQUEST_TEMPLATE.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 3df7337 commit 8a24f10

5 files changed

Lines changed: 63 additions & 14 deletions

File tree

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
* @norberttech

.github/PULL_REQUEST_TEMPLATE.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
<!-- Bellow section will be used to automatically generate changelog, please do not modify HTML code structure -->
1+
<!--
2+
Below section will be used to automatically generate changelog, please do not modify HTML code structure
3+
DO NOT REMOVE that HTML STRUCTURE, INSTEAD ADD YOUR CHANGES INSIDE THE LISTS
4+
PULL REQUESTS WITHOUT CHANGELOG CAN'T BE MERGED
5+
-->
26
<h2>Change Log</h2>
37
<div id="change-log">
48
<h4>Added</h4>

src/cli/src/Flow/CLI/Formatter/PipelineReportFormatter.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public function format() : void
5353
foreach ($columnsStatistics->all() as $columnStatistics) {
5454
$normalizedColumnStatistics[] = [
5555
'name' => $columnStatistics->name(),
56-
'type' => $columnStatistics->type()->toString(),
56+
'type' => $schema ? $schema->get($columnStatistics->reference())->type()->toString() : 'N/A',
5757
'nulls_count' => $valueFormatter->format($columnStatistics->nullCount()),
5858
'distinct_count' => $valueFormatter->format($columnStatistics->distinctCount()),
5959
'min' => $valueFormatter->format($columnStatistics->min()),

src/cli/tests/Flow/CLI/Tests/Integration/FileAnalyzeCommandTest.php

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ public function test_read_rows_csv() : void
5454
│ order_id │ uuid │ 0 │ 5 │ - │ - │ - │ - │ - │ - │
5555
│ created_at │ datetime │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
5656
│ updated_at │ datetime │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
57-
│ discount │ float │ 2 │ 3 │ 12.45 │ 47.10 │ - │ - │ - │ - │
57+
│ discount │ ?float │ 2 │ 3 │ 12.45 │ 47.10 │ - │ - │ - │ - │
5858
│ address │ map<string, string> │ 0 │ 5 │ - │ - │ - │ - │ 4 │ 4 │
5959
│ notes │ list<string> │ 0 │ 5 │ - │ - │ - │ - │ 1 │ 5 │
6060
│ items │ list<structure{sku: string, quantity: integer, price: float}> │ 0 │ 5 │ - │ - │ - │ - │ 2 │ 4 │
@@ -67,4 +67,49 @@ public function test_read_rows_csv() : void
6767

6868
self::assertStringContainsString("Execution Time", $tester->getDisplay());
6969
}
70+
71+
public function test_read_rows_csv_without_schema() : void
72+
{
73+
$application = new Application();
74+
$application->add(new FileAnalyzeCommand());
75+
$tester = new CommandTester($application->get('file:analyze'));
76+
77+
$tester->execute(['input-file' => __DIR__ . '/Fixtures/orders.csv', '--input-file-limit' => 5, '--stats-columns' => true]);
78+
79+
$tester->assertCommandIsSuccessful();
80+
81+
self::assertStringContainsString(
82+
<<<'OUTPUT'
83+
Analyzing File
84+
==============
85+
86+
[INFO] File path: orders.csv
87+
OUTPUT,
88+
$tester->getDisplay()
89+
);
90+
91+
self::assertStringContainsString(
92+
<<<'OUTPUT'
93+
Columns
94+
-------
95+
96+
┌────────────┬──────┬───────┬─────────────────┬───────────────────────────┬───────────────────────────┬────────────┬────────────┬────────────────────┬────────────────────┐
97+
│ Name │ Type │ Nulls │ Distinct Values │ Min │ Max │ Min Length │ Max Length │ Min Elements Count │ Max Elements Count │
98+
├────────────┼──────┼───────┼─────────────────┼───────────────────────────┼───────────────────────────┼────────────┼────────────┼────────────────────┼────────────────────┤
99+
│ order_id │ N/A │ 0 │ 5 │ - │ - │ - │ - │ - │ - │
100+
│ created_at │ N/A │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
101+
│ updated_at │ N/A │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
102+
│ discount │ N/A │ 2 │ 3 │ 12.45 │ 47.10 │ - │ - │ - │ - │
103+
│ address │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 4 │ 4 │
104+
│ notes │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 1 │ 5 │
105+
│ items │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 2 │ 4 │
106+
└────────────┴──────┴───────┴─────────────────┴───────────────────────────┴───────────────────────────┴────────────┴────────────┴────────────────────┴────────────────────┘
107+
OUTPUT,
108+
$tester->getDisplay()
109+
);
110+
111+
self::assertStringContainsString("Analyzed Rows", $tester->getDisplay());
112+
113+
self::assertStringContainsString("Execution Time", $tester->getDisplay());
114+
}
70115
}

src/core/etl/src/Flow/ETL/Dataset/Statistics/Column.php

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
namespace Flow\ETL\Dataset\Statistics;
66

7-
use Flow\ETL\PHP\Type\Type;
8-
use Flow\ETL\Row\Entry;
7+
use Flow\ETL\Row\{Entry, Reference};
98

109
final class Column
1110
{
@@ -25,23 +24,26 @@ final class Column
2524

2625
private int $nullsCount = 0;
2726

27+
private readonly Reference $reference;
28+
2829
/**
2930
* @param Entry<mixed, mixed> $entry
3031
*
3132
* @throws \JsonException
3233
*/
33-
public function __construct(public readonly Entry $entry)
34+
public function __construct(Entry $entry)
3435
{
36+
$this->reference = $entry->ref();
3537
$this->distinctCounter = new DistinctCounter();
36-
$this->calculate($this->entry);
38+
$this->calculate($entry);
3739
}
3840

3941
/**
4042
* @param Entry<mixed, mixed> $entry
4143
*/
4244
public function calculate(Entry $entry) : void
4345
{
44-
if (!$entry->is($this->entry->name())) {
46+
if (!$this->reference->is($entry->ref())) {
4547
return;
4648
}
4749

@@ -134,19 +136,16 @@ public function minLength() : ?int
134136

135137
public function name() : string
136138
{
137-
return $this->entry->name();
139+
return $this->reference->name();
138140
}
139141

140142
public function nullCount() : int
141143
{
142144
return $this->nullsCount;
143145
}
144146

145-
/**
146-
* @return Type<mixed>
147-
*/
148-
public function type() : Type
147+
public function reference() : Reference
149148
{
150-
return $this->entry->type();
149+
return $this->reference;
151150
}
152151
}

0 commit comments

Comments
 (0)