Skip to content

Commit 96f1d7f

Browse files
committed
Prevent fatal error when extracted columns don't match headers amount
1 parent 4291cb0 commit 96f1d7f

6 files changed

Lines changed: 121 additions & 20 deletions

File tree

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ final class GoogleSheetExtractor implements Extractor, LimitableExtractor
1414
{
1515
use Limitable;
1616

17+
private bool $dropExtraColumns = true;
18+
1719
/**
1820
* @param array{dateTimeRenderOption?: string, majorDimension?: string, valueRenderOption?: string} $options
1921
*/
@@ -57,24 +59,31 @@ public function extract(FlowContext $context) : \Generator
5759
$totalRows = 0;
5860
}
5961

62+
$headersCount = \count($headers);
63+
6064
$shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows();
6165

6266
while (\count($values)) {
6367
$rows = \array_map(
64-
function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
65-
if (\count($headers) > \count($rowData)) {
68+
function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) {
69+
$rowDataCount = \count($rowData);
70+
71+
if ($headersCount > $rowDataCount) {
6672
\array_push(
6773
$rowData,
6874
...\array_map(
6975
static fn (int $i) => null,
70-
\range(1, \count($headers) - \count($rowData))
76+
\range(1, $headersCount - $rowDataCount)
7177
)
7278
);
7379
}
7480

75-
if (\count($rowData) > \count($headers)) {
76-
/** @phpstan-ignore-next-line */
77-
$rowData = \array_chunk($rowData, \count($headers));
81+
if ($rowDataCount > $headersCount) {
82+
if (!$this->dropExtraColumns) {
83+
throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount);
84+
}
85+
86+
$rowData = \array_slice($rowData, 0, $headersCount);
7887
}
7988

8089
$row = \array_combine($headers, $rowData);
@@ -105,7 +114,7 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
105114
}
106115

107116
$cellsRange = $cellsRange->nextRows($this->rowsPerPage);
108-
/** @var Sheets\ValueRange $response */
117+
109118
$response = $this->service->spreadsheets_values->get($this->spreadsheetId, $cellsRange->toString(), $this->options);
110119
/**
111120
* @var array<array> $values
@@ -114,6 +123,13 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
114123
}
115124
}
116125

126+
public function withDropExtraColumns(bool $dropExtraColumns) : self
127+
{
128+
$this->dropExtraColumns = $dropExtraColumns;
129+
130+
return $this;
131+
}
132+
117133
public function withHeader(bool $withHeader) : self
118134
{
119135
$this->withHeader = $withHeader;

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/functions.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
namespace Flow\ETL\Adapter\GoogleSheet;
66

77
use Flow\ETL\Attribute\{DocumentationDSL, Module, Type};
8-
use Flow\ETL\Extractor;
98
use Google\Client;
109
use Google\Service\Sheets;
1110

@@ -25,7 +24,7 @@ function from_google_sheet(
2524
bool $with_header = true,
2625
int $rows_per_page = 1000,
2726
array $options = [],
28-
) : Extractor {
27+
) : GoogleSheetExtractor {
2928
if ($auth_config instanceof Sheets) {
3029
$sheets = $auth_config;
3130
} else {
@@ -64,7 +63,7 @@ function from_google_sheet_columns(
6463
bool $with_header = true,
6564
int $rows_per_page = 1000,
6665
array $options = [],
67-
) : Extractor {
66+
) : GoogleSheetExtractor {
6867
if ($auth_config instanceof Sheets) {
6968
$sheets = $auth_config;
7069
} else {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"range": "Sheet!A1:D11",
3+
"majorDimension": "ROWS",
4+
"values": [
5+
["Header 1", "Header 2", "Header 3"],
6+
["A2", "B2", "C2"],
7+
["A3", "B3", "C3"],
8+
["A4", "B4", "C4"],
9+
["A5", "B5", "C5"],
10+
["A6", "B6", "C6"],
11+
["A7", "B7", "C7"],
12+
["A8", "B8", "C8"],
13+
["A9", "B9", "C9"],
14+
["A10", "B10", "C10"],
15+
["A11", "B11", "C11", "D11"]
16+
]
17+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Adapter\GoogleSheet\Tests\Integration;
6+
7+
use function Flow\ETL\DSL\{config, flow_context};
8+
use Flow\ETL\Adapter\GoogleSheet\{Columns, GoogleSheetExtractor};
9+
use Flow\ETL\Exception\InvalidArgumentException;
10+
use Flow\ETL\Tests\FlowTestCase;
11+
use Google\Client as GoogleClient;
12+
use Google\Service\Sheets;
13+
use GuzzleHttp\{Client, HandlerStack};
14+
use GuzzleHttp\Handler\MockHandler;
15+
use GuzzleHttp\Psr7\{Response};
16+
17+
final class GoogleSheetExtractorTest extends FlowTestCase
18+
{
19+
public function test_extract_with_cut_extra_columns() : void
20+
{
21+
$client = new GoogleClient();
22+
$client->setHttpClient($this->createHttpClient());
23+
24+
$extractor = new GoogleSheetExtractor(
25+
new Sheets($client),
26+
'1234567890',
27+
new Columns('Sheet', 'A', 'Z'),
28+
);
29+
30+
$rows = $extractor->extract(flow_context(config()));
31+
32+
foreach ($rows as $row) {
33+
self::assertNotNull($row);
34+
}
35+
}
36+
37+
public function test_extract_without_cut_extra_columns() : void
38+
{
39+
$client = new GoogleClient();
40+
$client->setHttpClient($this->createHttpClient());
41+
42+
$extractor = new GoogleSheetExtractor(
43+
new Sheets($client),
44+
'1234567890',
45+
new Columns('Sheet', 'A', 'Z'),
46+
);
47+
$extractor->withDropExtraColumns(false);
48+
49+
$rows = $extractor->extract(flow_context(config()));
50+
51+
$this->expectException(InvalidArgumentException::class);
52+
$this->expectExceptionMessage('Row has more columns (4) than headers (3)');
53+
54+
foreach ($rows as $row) {
55+
self::assertNotNull($row);
56+
}
57+
}
58+
59+
private function createHttpClient() : Client
60+
{
61+
return new Client(
62+
[
63+
'handler' => HandlerStack::create(
64+
new MockHandler(
65+
[
66+
new Response(200, ['Content-Type' => 'application/json'], file_get_contents(__DIR__ . '/../Fixtures/extra-columns.json')),
67+
]
68+
)
69+
),
70+
]
71+
);
72+
}
73+
}

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Unit/GoogleSheetExtractorTest.php

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ public function test_its_stop_fetching_data_if_processed_row_count_is_less_then_
2323
$sheetName = 'sheet',
2424
'A',
2525
'B',
26-
true,
27-
2,
28-
);
26+
)->withHeader(true)
27+
->withRowsPerPage(2);
2928
$spreadSheetIdEntry = string_entry('_spread_sheet_id', $spreadSheetId);
3029
$sheetNameEntry = string_entry('_sheet_name', $sheetName);
3130
$firstValueRangeMock = $this->createMock(ValueRange::class);
@@ -63,9 +62,7 @@ public function test_rows_in_batch_must_be_positive_integer() : void
6362
'sheet',
6463
'A',
6564
'B',
66-
true,
67-
0
68-
);
65+
)->withRowsPerPage(0);
6966
}
7067

7168
public function test_works_for_no_data() : void
@@ -76,9 +73,8 @@ public function test_works_for_no_data() : void
7673
'sheet',
7774
'A',
7875
'B',
79-
true,
80-
20
81-
);
76+
)->withHeader(true)
77+
->withRowsPerPage(20);
8278
$ValueRangeMock = $this->createMock(ValueRange::class);
8379
$ValueRangeMock->method('getValues')->willReturn(null);
8480

web/landing/resources/dsl.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)