Skip to content

Commit 0d5c45d

Browse files
authored
Adjust GoogleSheetExtractor row data extraction (#2222)
* Adjust `GoogleSheetExtractor` row data extraction * Adjust `GoogleSheetExtractor` row data extraction
1 parent 0afde11 commit 0d5c45d

3 files changed

Lines changed: 65 additions & 24 deletions

File tree

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -76,38 +76,31 @@ public function extract(FlowContext $context) : \Generator
7676
$shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows();
7777

7878
while ([] !== $values) {
79-
$rows = \array_map(
80-
function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) {
81-
$rowDataCount = \count($rowData);
79+
foreach ($values as $rowData) {
80+
$rowDataCount = \count($rowData);
8281

83-
// Expand columns to the size of the previous row
84-
for ($i = $rowDataCount; $i < $headersCount; $i++) {
85-
$rowData[$i] = null;
86-
}
87-
88-
if ($rowDataCount > $headersCount) {
89-
if (!$this->dropExtraColumns) {
90-
throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount);
91-
}
82+
// Expand columns to the size of the previous row
83+
for ($i = $rowDataCount; $i < $headersCount; $i++) {
84+
$rowData[$i] = null;
85+
}
9286

93-
$rowData = \array_slice($rowData, 0, $headersCount);
87+
if ($rowDataCount > $headersCount) {
88+
if (!$this->dropExtraColumns) {
89+
throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount);
9490
}
9591

96-
$row = \array_combine($headers, $rowData);
92+
$rowData = \array_slice($rowData, 0, $headersCount);
93+
}
9794

98-
if ($shouldPutInputIntoRows) {
99-
$row['_spread_sheet_id'] = $this->spreadsheetId;
100-
$row['_sheet_name'] = $this->columnRange->sheetName;
101-
}
95+
$row = \array_combine($headers, $rowData);
10296

103-
return $row;
104-
},
105-
$values
106-
);
97+
if ($shouldPutInputIntoRows) {
98+
$row['_spread_sheet_id'] = $this->spreadsheetId;
99+
$row['_sheet_name'] = $this->columnRange->sheetName;
100+
}
107101

108-
$totalRows += \count($rows);
102+
$totalRows++;
109103

110-
foreach ($rows as $row) {
111104
$signal = yield array_to_rows($row, $context->entryFactory(), schema: $this->schema);
112105

113106
$this->incrementReturnedRows();
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"range": "Sheet!A1:C5",
3+
"majorDimension": "ROWS",
4+
"values": [
5+
["Header 1", "Header 2", "Header 3"],
6+
["A2", "B2", "C2"],
7+
["A3", "B3"],
8+
["A4", "B4"],
9+
["A5", "B5", "C5"]
10+
]
11+
}

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration/GoogleSheetExtractorTest.php

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,24 @@ protected function setUp() : void
1919
$this->context = new GoogleSheetsContext();
2020
}
2121

22+
public function test_extract_expand_missing_columns() : void
23+
{
24+
$rows = df()
25+
->extract(
26+
from_google_sheet(
27+
$this->context->sheets(__DIR__ . '/../Fixtures/missing-columns.json'),
28+
'1234567890',
29+
'Sheet',
30+
)
31+
)
32+
->fetch()
33+
->toArray();
34+
35+
foreach ($rows as $row) {
36+
self::assertCount(3, $row);
37+
}
38+
}
39+
2240
public function test_extract_puts_null_in_not_matching_schema_rows() : void
2341
{
2442
$rows = df()
@@ -58,6 +76,8 @@ public function test_extract_skip_extra_empty_rows() : void
5876
->fetch()
5977
->toArray();
6078

79+
self::assertCount(10, $rows);
80+
6181
foreach ($rows as $row) {
6282
self::assertNotSame([], $row);
6383
}
@@ -81,6 +101,23 @@ public function test_extract_with_cut_extra_columns() : void
81101
}
82102
}
83103

104+
public function test_extract_with_limit() : void
105+
{
106+
$extractor = from_google_sheet(
107+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
108+
'1234567890',
109+
'Sheet',
110+
);
111+
$extractor->changeLimit(2);
112+
113+
$rows = df()
114+
->extract($extractor)
115+
->fetch()
116+
->toArray();
117+
118+
self::assertCount(2, $rows);
119+
}
120+
84121
public function test_extract_without_cut_extra_columns() : void
85122
{
86123
$this->expectException(InvalidArgumentException::class);

0 commit comments

Comments
 (0)