Skip to content

Commit 192ff1f

Browse files
committed
Add option to drop extra columns from extracted spreadsheets
1 parent 4fd67a7 commit 192ff1f

3 files changed

Lines changed: 29 additions & 19 deletions

File tree

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ final class GoogleSheetExtractor implements Extractor, LimitableExtractor
1414
{
1515
use Limitable;
1616

17+
private bool $dropExtraColumns = false;
18+
1719
/**
1820
* @param array{dateTimeRenderOption?: string, majorDimension?: string, valueRenderOption?: string} $options
1921
*/
@@ -57,24 +59,30 @@ public function extract(FlowContext $context) : \Generator
5759
$totalRows = 0;
5860
}
5961

62+
$headersCount = \count($headers);
63+
6064
$shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows();
6165

6266
while (\count($values)) {
6367
$rows = \array_map(
64-
function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
65-
if (\count($headers) > \count($rowData)) {
68+
function (array $rowData) use ($headers, $headersCount, $shouldPutInputIntoRows) {
69+
if ($headersCount > \count($rowData)) {
6670
\array_push(
6771
$rowData,
6872
...\array_map(
6973
static fn (int $i) => null,
70-
\range(1, \count($headers) - \count($rowData))
74+
\range(1, $headersCount - \count($rowData))
7175
)
7276
);
7377
}
7478

75-
if (\count($rowData) > \count($headers)) {
76-
/** @phpstan-ignore-next-line */
77-
$rowData = \array_chunk($rowData, \count($headers));
79+
if (\count($rowData) > $headersCount) {
80+
if ($this->dropExtraColumns) {
81+
$rowData = \array_slice($rowData, 0, $headersCount);
82+
} else {
83+
/** @phpstan-ignore-next-line */
84+
$rowData = \array_chunk($rowData, $headersCount);
85+
}
7886
}
7987

8088
$row = \array_combine($headers, $rowData);
@@ -105,7 +113,7 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
105113
}
106114

107115
$cellsRange = $cellsRange->nextRows($this->rowsPerPage);
108-
/** @var Sheets\ValueRange $response */
116+
109117
$response = $this->service->spreadsheets_values->get($this->spreadsheetId, $cellsRange->toString(), $this->options);
110118
/**
111119
* @var array<array> $values
@@ -114,6 +122,13 @@ function (array $rowData) use ($headers, $shouldPutInputIntoRows) {
114122
}
115123
}
116124

125+
public function withDropExtraColumns(bool $dropExtraColumns) : self
126+
{
127+
$this->dropExtraColumns = $dropExtraColumns;
128+
129+
return $this;
130+
}
131+
117132
public function withHeader(bool $withHeader) : self
118133
{
119134
$this->withHeader = $withHeader;

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/functions.php

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
namespace Flow\ETL\Adapter\GoogleSheet;
66

77
use Flow\ETL\Attribute\{DocumentationDSL, Module, Type};
8-
use Flow\ETL\Extractor;
98
use Google\Client;
109
use Google\Service\Sheets;
1110

@@ -25,7 +24,7 @@ function from_google_sheet(
2524
bool $with_header = true,
2625
int $rows_per_page = 1000,
2726
array $options = [],
28-
) : Extractor {
27+
) : GoogleSheetExtractor {
2928
if ($auth_config instanceof Sheets) {
3029
$sheets = $auth_config;
3130
} else {
@@ -64,7 +63,7 @@ function from_google_sheet_columns(
6463
bool $with_header = true,
6564
int $rows_per_page = 1000,
6665
array $options = [],
67-
) : Extractor {
66+
) : GoogleSheetExtractor {
6867
if ($auth_config instanceof Sheets) {
6968
$sheets = $auth_config;
7069
} else {

src/adapter/etl-adapter-google-sheet/tests/Flow/ETL/Adapter/GoogleSheet/Tests/Unit/GoogleSheetExtractorTest.php

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,8 @@ public function test_its_stop_fetching_data_if_processed_row_count_is_less_then_
2323
$sheetName = 'sheet',
2424
'A',
2525
'B',
26-
true,
27-
2,
28-
);
26+
)->withHeader(true)
27+
->withRowsPerPage(2);
2928
$spreadSheetIdEntry = string_entry('_spread_sheet_id', $spreadSheetId);
3029
$sheetNameEntry = string_entry('_sheet_name', $sheetName);
3130
$firstValueRangeMock = $this->createMock(ValueRange::class);
@@ -63,9 +62,7 @@ public function test_rows_in_batch_must_be_positive_integer() : void
6362
'sheet',
6463
'A',
6564
'B',
66-
true,
67-
0
68-
);
65+
)->withRowsPerPage(0);
6966
}
7067

7168
public function test_works_for_no_data() : void
@@ -76,9 +73,8 @@ public function test_works_for_no_data() : void
7673
'sheet',
7774
'A',
7875
'B',
79-
true,
80-
20
81-
);
76+
)->withHeader(true)
77+
->withRowsPerPage(20);
8278
$ValueRangeMock = $this->createMock(ValueRange::class);
8379
$ValueRangeMock->method('getValues')->willReturn(null);
8480

0 commit comments

Comments
 (0)