Skip to content

Commit 7814d70

Browse files
authored
[GoogleSheet] Prevent fatal error when extracted columns don't match headers amount (#1606)
1 parent 4291cb0 commit 7814d70

9 files changed

Lines changed: 154 additions & 21 deletions

File tree

composer.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,8 @@
307307
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-elasticsearch-integration --log-junit ./var/phpunit/logs/adapter-elasticsearch-integration.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-elasticsearch-integration.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-elasticsearch-integration"
308308
],
309309
"test:adapter:google-sheet": [
310-
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-google-sheet-unit --log-junit ./var/phpunit/logs/adapter-google-sheet-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-google-sheet-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-google-sheet-unit"
310+
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-google-sheet-unit --log-junit ./var/phpunit/logs/adapter-google-sheet-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-google-sheet-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-google-sheet-unit",
311+
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-google-sheet-integration --log-junit ./var/phpunit/coverage/clover/adapter-google-sheet-integration.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-google-sheet-integration.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-google-sheet-integration"
311312
],
312313
"test:adapter:http": [
313314
"tools/phpunit/vendor/bin/phpunit --testsuite=adapter-http-unit --log-junit ./var/phpunit/logs/adapter-http-unit.junit.xml --coverage-clover=./var/phpunit/coverage/clover/adapter-http-unit.coverage.xml --coverage-html=./var/phpunit/coverage/html/adapter-http-unit",

phpunit.xml.dist

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@
120120
<testsuite name="adapter-google-sheet-unit">
121121
<directory>src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Unit</directory>
122122
</testsuite>
123+
<testsuite name="adapter-google-sheet-integration">
124+
<directory>src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Integration</directory>
125+
</testsuite>
123126
<testsuite name="adapter-http-unit">
124127
<directory>src/adapter/etl-adapter-http/tests/Flow/ETL/Adapter/HTTP/Tests/Unit</directory>
125128
</testsuite>

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ final class GoogleSheetExtractor implements Extractor, LimitableExtractor
1414
{
1515
use Limitable;
1616

17+
private bool $dropExtraColumns = true;
18+
1719
/**
1820
* @param array{dateTimeRenderOption?: string, majorDimension?: string, valueRenderOption?: string} $options
1921
*/
@@ -57,24 +59,31 @@ public function extract(FlowContext $context) : \Generator
5759
$totalRows = 0;
5860
}
5961

62+
$headersCount = \count($headers);
63+
6064
$shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows();
6165

6266
while (\count($values)) {
6367
$rows = \array_map(
64-
function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
65-
if (\count($headers) > \count($rowData)) {
68+
function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) {
69+
$rowDataCount = \count($rowData);
70+
71+
if ($headersCount > $rowDataCount) {
6672
\array_push(
6773
$rowData,
6874
...\array_map(
6975
static fn (int $i) => null,
70-
\range(1, \count($headers) - \count($rowData))
76+
\range(1, $headersCount - $rowDataCount)
7177
)
7278
);
7379
}
7480

75-
if (\count($rowData) > \count($headers)) {
76-
/** @phpstan-ignore-next-line */
77-
$rowData = \array_chunk($rowData, \count($headers));
81+
if ($rowDataCount > $headersCount) {
82+
if (!$this->dropExtraColumns) {
83+
throw InvalidArgumentException::because('Row has more columns (%d) than headers (%d)', $rowDataCount, $headersCount);
84+
}
85+
86+
$rowData = \array_slice($rowData, 0, $headersCount);
7887
}
7988

8089
$row = \array_combine($headers, $rowData);
@@ -105,7 +114,7 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
105114
}
106115

107116
$cellsRange = $cellsRange->nextRows($this->rowsPerPage);
108-
/** @var Sheets\ValueRange $response */
117+
109118
$response = $this->service->spreadsheets_values->get($this->spreadsheetId, $cellsRange->toString(), $this->options);
110119
/**
111120
* @var array<array> $values
@@ -114,6 +123,13 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
114123
}
115124
}
116125

126+
public function withDropExtraColumns(bool $dropExtraColumns) : self
127+
{
128+
$this->dropExtraColumns = $dropExtraColumns;
129+
130+
return $this;
131+
}
132+
117133
public function withHeader(bool $withHeader) : self
118134
{
119135
$this->withHeader = $withHeader;

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/functions.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
namespace Flow\ETL\Adapter\GoogleSheet;
66

77
use Flow\ETL\Attribute\{DocumentationDSL, Module, Type};
8-
use Flow\ETL\Extractor;
98
use Google\Client;
109
use Google\Service\Sheets;
1110

@@ -25,7 +24,7 @@ function from_google_sheet(
2524
bool $with_header = true,
2625
int $rows_per_page = 1000,
2726
array $options = [],
28-
) : Extractor {
27+
) : GoogleSheetExtractor {
2928
if ($auth_config instanceof Sheets) {
3029
$sheets = $auth_config;
3130
} else {
@@ -64,7 +63,7 @@ function from_google_sheet_columns(
6463
bool $with_header = true,
6564
int $rows_per_page = 1000,
6665
array $options = [],
67-
) : Extractor {
66+
) : GoogleSheetExtractor {
6867
if ($auth_config instanceof Sheets) {
6968
$sheets = $auth_config;
7069
} else {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
{
2+
"range": "Sheet!A1:D11",
3+
"majorDimension": "ROWS",
4+
"values": [
5+
["Header 1", "Header 2", "Header 3"],
6+
["A2", "B2", "C2"],
7+
["A3", "B3", "C3"],
8+
["A4", "B4", "C4"],
9+
["A5", "B5", "C5"],
10+
["A6", "B6", "C6"],
11+
["A7", "B7", "C7"],
12+
["A8", "B8", "C8"],
13+
["A9", "B9", "C9"],
14+
["A10", "B10", "C10"],
15+
["A11", "B11", "C11", "D11"]
16+
]
17+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Adapter\GoogleSheet\Tests;
6+
7+
use Google\Client as GoogleClient;
8+
use Google\Service\Sheets;
9+
use GuzzleHttp\{Client as HttpClient, HandlerStack};
10+
use GuzzleHttp\Handler\MockHandler;
11+
use GuzzleHttp\Psr7\Response;
12+
13+
final readonly class GoogleSheetsContext
14+
{
15+
private GoogleClient $client;
16+
17+
public function __construct()
18+
{
19+
$this->client = new GoogleClient();
20+
}
21+
22+
public function sheets(string $fixtureFile) : Sheets
23+
{
24+
$this->client->setHttpClient($this->createHttpClient($fixtureFile));
25+
26+
return new Sheets($this->client);
27+
}
28+
29+
private function createHttpClient(string $fixtureFile) : HttpClient
30+
{
31+
return new HttpClient(
32+
[
33+
'handler' => HandlerStack::create(
34+
new MockHandler(
35+
[
36+
new Response(
37+
200,
38+
['Content-Type' => 'application/json'],
39+
file_get_contents($fixtureFile) ?: throw new \RuntimeException('Failed to read file: ' . $fixtureFile)
40+
),
41+
]
42+
)
43+
),
44+
]
45+
);
46+
}
47+
}
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Flow\ETL\Adapter\GoogleSheet\Tests\Integration;
6+
7+
use function Flow\ETL\DSL\{config, flow_context};
8+
use Flow\ETL\Adapter\GoogleSheet\{Columns, GoogleSheetExtractor, Tests\GoogleSheetsContext};
9+
use Flow\ETL\Exception\InvalidArgumentException;
10+
use Flow\ETL\Tests\FlowTestCase;
11+
12+
final class GoogleSheetExtractorTest extends FlowTestCase
13+
{
14+
private GoogleSheetsContext $context;
15+
16+
protected function setUp() : void
17+
{
18+
$this->context = new GoogleSheetsContext();
19+
}
20+
21+
public function test_extract_with_cut_extra_columns() : void
22+
{
23+
$extractor = new GoogleSheetExtractor(
24+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
25+
'1234567890',
26+
new Columns('Sheet', 'A', 'Z'),
27+
);
28+
29+
$rows = $extractor->extract(flow_context(config()));
30+
31+
foreach ($rows as $row) {
32+
self::assertNotNull($row);
33+
}
34+
}
35+
36+
public function test_extract_without_cut_extra_columns() : void
37+
{
38+
$extractor = new GoogleSheetExtractor(
39+
$this->context->sheets(__DIR__ . '/../Fixtures/extra-columns.json'),
40+
'1234567890',
41+
new Columns('Sheet', 'A', 'Z'),
42+
);
43+
$extractor->withDropExtraColumns(false);
44+
45+
$rows = $extractor->extract(flow_context(config()));
46+
47+
$this->expectException(InvalidArgumentException::class);
48+
$this->expectExceptionMessage('Row has more columns (4) than headers (3)');
49+
50+
foreach ($rows as $row) {
51+
self::assertNotNull($row);
52+
}
53+
}
54+
}

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Unit/GoogleSheetExtractorTest.php

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ public function test_its_stop_fetching_data_if_processed_row_count_is_less_then_
2323
$sheetName = 'sheet',
2424
'A',
2525
'B',
26-
true,
27-
2,
28-
);
26+
)->withHeader(true)
27+
->withRowsPerPage(2);
2928
$spreadSheetIdEntry = string_entry('_spread_sheet_id', $spreadSheetId);
3029
$sheetNameEntry = string_entry('_sheet_name', $sheetName);
3130
$firstValueRangeMock = $this->createMock(ValueRange::class);
@@ -63,9 +62,7 @@ public function test_rows_in_batch_must_be_positive_integer() : void
6362
'sheet',
6463
'A',
6564
'B',
66-
true,
67-
0
68-
);
65+
)->withRowsPerPage(0);
6966
}
7067

7168
public function test_works_for_no_data() : void
@@ -76,9 +73,8 @@ public function test_works_for_no_data() : void
7673
'sheet',
7774
'A',
7875
'B',
79-
true,
80-
20
81-
);
76+
)->withHeader(true)
77+
->withRowsPerPage(20);
8278
$ValueRangeMock = $this->createMock(ValueRange::class);
8379
$ValueRangeMock->method('getValues')->willReturn(null);
8480

web/landing/resources/dsl.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)