Skip to content

Commit bd2f34b

Browse files
authored
Simplify Excel & GoogleSheet extractor cell expanding (#1661)
1 parent c081474 commit bd2f34b

5 files changed

Lines changed: 35 additions & 27 deletions

File tree

src/adapter/etl-adapter-excel/src/Flow/ETL/Adapter/Excel/ExcelExtractor.php

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -120,28 +120,17 @@ public function withSheetName(string $sheetName) : self
120120
return $this;
121121
}
122122

123-
private function createRowsFromCells(Row $row) : array
123+
private function createRowsFromCells(Row $row, int $previousRowDataCount = 0) : array
124124
{
125-
return \array_map(
126-
fn (Cell $cell) =>
127-
// Convert empty values to nullables if allowed
128-
$this->convertEmptyToNull && '' === $cell->getValue() ? null : $cell->getValue(),
125+
$rowData = \array_map(
126+
// Convert empty values to nullables if allowed
127+
fn (Cell $cell) => $this->convertEmptyToNull && '' === $cell->getValue() ? null : $cell->getValue(),
129128
$row->getCells()
130129
);
131-
}
132130

133-
private function extendRowData(int $headersCount, array $rowData) : array
134-
{
135-
$rowDataCount = \count($rowData);
136-
137-
if ($headersCount > $rowDataCount) {
138-
\array_push(
139-
$rowData,
140-
...\array_map(
141-
static fn (int $i) => null,
142-
\range(1, $headersCount - $rowDataCount)
143-
)
144-
);
131+
// Expand columns to the size of the previous row
132+
for ($i = \count($rowData); $i < $previousRowDataCount; $i++) {
133+
$rowData[$i] = null;
145134
}
146135

147136
return $rowData;
@@ -172,7 +161,7 @@ private function extractRows(SourceStream $stream, array $headers, int $offset)
172161
}
173162

174163
// ODS format reader skips empty cells when reading rows
175-
$rowData = $this->extendRowData($previousRowDataCount, $this->createRowsFromCells($row));
164+
$rowData = $this->createRowsFromCells($row, $previousRowDataCount);
176165
$previousRowDataCount = \count($rowData);
177166

178167
if ($this->withHeader) {
Binary file not shown.
Binary file not shown.

src/adapter/etl-adapter-excel/tests/Flow/ETL/Adapter/Excel/Tests/Integration/ExcelExtractorTest.php

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ public static function provide_fixtures() : iterable
2020
yield 'xlsx' => [__DIR__ . '/../Fixtures/fixture.xlsx'];
2121
}
2222

23+
public static function provide_nullable_fixtures() : iterable
24+
{
25+
yield 'ods' => [__DIR__ . '/../Fixtures/nullable_fixture.ods'];
26+
yield 'xlsx' => [__DIR__ . '/../Fixtures/nullable_fixture.xlsx'];
27+
}
28+
2329
#[DataProvider('provide_fixtures')]
2430
public function test_extract_excel_file(string $fixtureName) : void
2531
{
@@ -165,6 +171,24 @@ public function test_extract_excel_file_without_header(string $fixtureName) : vo
165171
self::assertSame(10, $total);
166172
}
167173

174+
#[DataProvider('provide_nullable_fixtures')]
175+
public function test_extract_excel_nullable_file(string $fixtureName) : void
176+
{
177+
$extractor = from_excel($fixtureName);
178+
179+
$total = 0;
180+
181+
foreach ($extractor->extract(flow_context(config())) as $rows) {
182+
$rows->each(function (Row $row) : void {
183+
$this->assertSame(['id', 'name', 'email'], \array_keys($row->toArray()));
184+
$this->assertCount(3, $row->toArray());
185+
});
186+
$total += $rows->count();
187+
}
188+
189+
self::assertSame(5, $total);
190+
}
191+
168192
public function test_extract_with_wrongly_selected_reader() : void
169193
{
170194
$extractor = from_excel(__DIR__ . '/../Fixtures/fixture.xlsx');

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,9 @@ public function extract(FlowContext $context) : \Generator
6868
function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) {
6969
$rowDataCount = \count($rowData);
7070

71-
if ($headersCount > $rowDataCount) {
72-
\array_push(
73-
$rowData,
74-
...\array_map(
75-
static fn (int $i) => null,
76-
\range(1, $headersCount - $rowDataCount)
77-
)
78-
);
71+
// Expand columns to the size of the previous row
72+
for ($i = $rowDataCount; $i < $headersCount; $i++) {
73+
$rowData[$i] = null;
7974
}
8075

8176
if ($rowDataCount > $headersCount) {

0 commit comments

Comments
 (0)