Skip to content

Commit 1604a0f

Browse files
authored
Unified behavior of file analyze and file schema CLI Commands (#1785)
1 parent 836497d commit 1604a0f

2 files changed

Lines changed: 21 additions & 17 deletions

File tree

src/cli/src/Flow/CLI/Command/FileAnalyzeCommand.php

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ public function configure() : void
5050
->addArgument('input-file', InputArgument::REQUIRED, 'Path to a file from which schema should be extracted.')
5151
->addOption('input-file-format', null, InputArgument::OPTIONAL, 'File format. When not set file format is guessed from source file path extension', null)
5252
->addOption('input-file-batch-size', null, InputOption::VALUE_REQUIRED, 'Number of rows that are going to be read and displayed in one batch, when set to -1 whole dataset will be displayed at once', self::DEFAULT_BATCH_SIZE)
53-
->addOption('input-file-limit', null, InputOption::VALUE_REQUIRED, 'Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed', null);
53+
->addOption('input-file-limit', null, InputOption::VALUE_REQUIRED, 'Limit number of rows that are going to be used to infer file schema, when not set whole file is analyzed', null)
54+
->addOption('schema-auto-cast', null, InputOption::VALUE_OPTIONAL, 'When set Flow will try to automatically cast values to more precise data types, for example datetime strings will be casted to datetime type', false);
5455

5556
$this->addConfigOptions($this);
5657
$this->addJSONInputOptions($this);
@@ -78,8 +79,11 @@ protected function execute(InputInterface $input, OutputInterface $output) : int
7879
return Command::FAILURE;
7980
}
8081

81-
$df->batchSize($batchSize)
82-
->autoCast();
82+
$df->batchSize($batchSize);
83+
84+
if (option_bool('schema-auto-cast', $input)) {
85+
$df->autoCast();
86+
}
8387

8488
$limit = option_int_nullable('input-file-limit', $input);
8589

@@ -97,7 +101,7 @@ protected function execute(InputInterface $input, OutputInterface $output) : int
97101
}
98102

99103
if (option_bool('stats-columns', $input)) {
100-
$analyze->withColumnStatistics();
104+
$analyze->withSchema()->withColumnStatistics();
101105
}
102106

103107
$report = $df->run(

src/cli/tests/Flow/CLI/Tests/Integration/FileAnalyzeCommandTest.php

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public function test_read_rows_csv() : void
1717
$application->add(new FileAnalyzeCommand());
1818
$tester = new CommandTester($application->get('file:analyze'));
1919

20-
$tester->execute(['input-file' => __DIR__ . '/Fixtures/orders.csv', '--input-file-limit' => 5, '--stats-schema' => true, '--stats-columns' => true]);
20+
$tester->execute(['input-file' => __DIR__ . '/Fixtures/orders.csv', '--input-file-limit' => 5, '--stats-schema' => true, '--stats-columns' => true, '--schema-auto-cast' => true]);
2121

2222
$tester->assertCommandIsSuccessful();
2323

@@ -74,7 +74,7 @@ public function test_read_rows_csv_without_schema() : void
7474
$application->add(new FileAnalyzeCommand());
7575
$tester = new CommandTester($application->get('file:analyze'));
7676

77-
$tester->execute(['input-file' => __DIR__ . '/Fixtures/orders.csv', '--input-file-limit' => 5, '--stats-columns' => true]);
77+
$tester->execute(['input-file' => __DIR__ . '/Fixtures/orders.csv', '--input-file-limit' => 5, '--stats-columns' => true, '--schema-auto-cast' => true]);
7878

7979
$tester->assertCommandIsSuccessful();
8080

@@ -93,17 +93,17 @@ public function test_read_rows_csv_without_schema() : void
9393
Columns
9494
-------
9595
96-
┌────────────┬──────┬───────┬─────────────────┬───────────────────────────┬───────────────────────────┬────────────┬────────────┬────────────────────┬────────────────────┐
97-
│ Name │ Type │ Nulls │ Distinct Values │ Min │ Max │ Min Length │ Max Length │ Min Elements Count │ Max Elements Count │
98-
├────────────┼──────┼───────┼─────────────────┼───────────────────────────┼───────────────────────────┼────────────┼────────────┼────────────────────┼────────────────────┤
99-
│ order_id │ N/A │ 0 │ 5 │ - │ - │ - │ - │ - │ - │
100-
│ created_at │ N/A │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
101-
│ updated_at │ N/A │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
102-
│ discount │ N/A │ 2 │ 3 │ 12.45 │ 47.10 │ - │ - │ - │ - │
103-
│ address │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 4 │ 4 │
104-
│ notes │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 1 │ 5 │
105-
│ items │ N/A │ 0 │ 5 │ - │ - │ - │ - │ 2 │ 4 │
106-
└────────────┴──────┴───────┴─────────────────┴───────────────────────────┴───────────────────────────┴────────────┴────────────┴────────────────────┴────────────────────┘
96+
┌────────────┬───────────────────────────────────────────────────────────────┬───────┬─────────────────┬───────────────────────────┬───────────────────────────┬────────────┬────────────┬────────────────────┬────────────────────┐
97+
│ Name │ Type │ Nulls │ Distinct Values │ Min │ Max │ Min Length │ Max Length │ Min Elements Count │ Max Elements Count │
98+
├────────────┼───────────────────────────────────────────────────────────────┼───────┼─────────────────┼───────────────────────────┼───────────────────────────┼────────────┼────────────┼────────────────────┼────────────────────┤
99+
│ order_id │ uuid │ 0 │ 5 │ - │ - │ - │ - │ - │ - │
100+
│ created_at │ datetime │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
101+
│ updated_at │ datetime │ 0 │ 5 │ 2024-02-23T19:18:53+00:00 │ 2024-06-17T19:24:49+00:00 │ - │ - │ - │ - │
102+
│ discount │ float │ 2 │ 3 │ 12.45 │ 47.10 │ - │ - │ - │ - │
103+
│ address │ map<string, string> │ 0 │ 5 │ - │ - │ - │ - │ 4 │ 4 │
104+
│ notes │ list<string> │ 0 │ 5 │ - │ - │ - │ - │ 1 │ 5 │
105+
│ items │ list<structure{sku: string, quantity: integer, price: float}> │ 0 │ 5 │ - │ - │ - │ - │ 2 │ 4 │
106+
└────────────┴───────────────────────────────────────────────────────────────┴───────┴─────────────────┴───────────────────────────┴───────────────────────────┴────────────┴────────────┴────────────────────┴────────────────────┘
107107
OUTPUT,
108108
$tester->getDisplay()
109109
);

0 commit comments

Comments
 (0)