Skip to content

Commit 3fc9d56

Browse files
authored
Rework GoogleSheetExtractor to use batch get functionality (#2224)
* Rework `GoogleSheetExtractor` to use batch get functionality * Rework `GoogleSheetExtractor` to use batch get functionality * Rework `GoogleSheetExtractor` to use batch get functionality
1 parent 2d57d5e commit 3fc9d56

11 files changed

Lines changed: 397 additions & 139 deletions

File tree

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/GoogleSheetExtractor.php

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -37,48 +37,60 @@ public function __construct(
3737

3838
public function extract(FlowContext $context) : \Generator
3939
{
40-
$cellsRange = new SheetRange($this->columnRange, 1, $this->rowsPerPage);
41-
$headers = [];
42-
43-
$response = $this->service->spreadsheets_values->get(
40+
$spreadsheet = $this->service->spreadsheets->get(
4441
$this->spreadsheetId,
45-
$cellsRange->toString(),
46-
$this->options
42+
['ranges' => [], 'includeGridData' => false]
4743
);
4844

49-
/**
50-
* @var array<array<null|string>> $values
51-
*/
52-
$values = $response->getValues() ?? [];
45+
$maxRows = 0;
5346

54-
$totalRows = 0;
55-
56-
if ($this->withHeader && [] !== $values) {
57-
foreach ($values as $index => $row) {
58-
if ([] === $row) {
59-
// Remove empty rows at the beginning of a sheet
60-
unset($values[$index]);
61-
62-
continue;
63-
}
64-
65-
/** @var array<string> $headers */
66-
$headers = $row;
67-
unset($values[$index]);
68-
$totalRows = 1;
47+
foreach ($spreadsheet->getSheets() as $sheet) {
48+
if ($sheet->getProperties()->title === $this->columnRange->sheetName) {
49+
$maxRows = $sheet->getProperties()->getGridProperties()->getRowCount();
6950

7051
break;
7152
}
7253
}
7354

74-
$headersCount = \count($headers);
55+
$cellsRange = new SheetRange($this->columnRange, 1, $this->rowsPerPage, $maxRows);
56+
57+
$ranges = [];
58+
59+
for ($totalRows = 0; $totalRows < $cellsRange->endRow; $totalRows += $this->rowsPerPage) {
60+
$ranges[] = $cellsRange->toString();
61+
62+
$cellsRange = $cellsRange->nextRows($this->rowsPerPage);
63+
}
7564

7665
$shouldPutInputIntoRows = $context->config->shouldPutInputIntoRows();
7766

78-
while ([] !== $values) {
79-
foreach ($values as $rowData) {
67+
$headers = [];
68+
$headersCount = 0;
69+
70+
$response = $this->service->spreadsheets_values->batchGet($this->spreadsheetId, array_merge($this->options, ['ranges' => $ranges]));
71+
72+
foreach ($response->getValueRanges() as $valueRange) {
73+
foreach ($valueRange->getValues() as $rowData) {
8074
$rowDataCount = \count($rowData);
8175

76+
if ($this->withHeader) {
77+
if ([] === $headers) {
78+
// Skip empty rows at the beginning of a sheet
79+
if ([] === $rowData) {
80+
continue;
81+
}
82+
83+
/** @var array<string> $headers */
84+
$headers = $rowData;
85+
86+
$headersCount = $rowDataCount;
87+
88+
continue;
89+
}
90+
} elseif (0 === $headersCount) {
91+
$headersCount = $rowDataCount;
92+
}
93+
8294
// Expand columns to the size of the previous row
8395
for ($i = $rowDataCount; $i < $headersCount; $i++) {
8496
$rowData[$i] = null;
@@ -92,35 +104,23 @@ public function extract(FlowContext $context) : \Generator
92104
$rowData = \array_slice($rowData, 0, $headersCount);
93105
}
94106

95-
$row = \array_combine($headers, $rowData);
107+
if ($this->withHeader) {
108+
$rowData = \array_combine($headers, $rowData);
109+
}
96110

97111
if ($shouldPutInputIntoRows) {
98-
$row['_spread_sheet_id'] = $this->spreadsheetId;
99-
$row['_sheet_name'] = $this->columnRange->sheetName;
112+
$rowData['_spread_sheet_id'] = $this->spreadsheetId;
113+
$rowData['_sheet_name'] = $this->columnRange->sheetName;
100114
}
101115

102-
$totalRows++;
103-
104-
$signal = yield array_to_rows($row, $context->entryFactory(), schema: $this->schema);
116+
$signal = yield array_to_rows($rowData, $context->entryFactory(), schema: $this->schema);
105117

106118
$this->incrementReturnedRows();
107119

108120
if ($signal === Signal::STOP || $this->reachedLimit()) {
109121
return;
110122
}
111123
}
112-
113-
if ($totalRows < $cellsRange->endRow) {
114-
return;
115-
}
116-
117-
$cellsRange = $cellsRange->nextRows($this->rowsPerPage);
118-
119-
$response = $this->service->spreadsheets_values->get($this->spreadsheetId, $cellsRange->toString(), $this->options);
120-
/**
121-
* @var array<array<null|string>> $values
122-
*/
123-
$values = $response->getValues() ?? [];
124124
}
125125
}
126126

src/adapter/etl-adapter-google-sheet/src/Flow/ETL/Adapter/GoogleSheet/SheetRange.php

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,31 @@
88

99
final readonly class SheetRange
1010
{
11+
public int $endRow;
12+
1113
public function __construct(
1214
public Columns $columnRange,
1315
public int $startRow,
14-
public int $endRow,
16+
int $endRow,
17+
private int $maxRows,
1518
) {
1619
if ($this->startRow < 1) {
1720
throw new InvalidArgumentException(\sprintf('Start row "%d" must be greater than 0', $this->startRow));
1821
}
1922

20-
if ($this->endRow < 1) {
21-
throw new InvalidArgumentException(\sprintf('End row "%d" must be greater than 0', $this->endRow));
23+
if ($endRow < 1) {
24+
throw new InvalidArgumentException(\sprintf('End row "%d" must be greater than 0', $endRow));
25+
}
26+
27+
if ($endRow < $this->startRow) {
28+
throw new InvalidArgumentException(\sprintf('End row "%d" must be greater or equal to start row "%d"', $endRow, $this->startRow));
2229
}
2330

24-
if ($this->endRow < $this->startRow) {
25-
throw new InvalidArgumentException(\sprintf('End row "%d" must be greater or equal to start row "%d"', $this->endRow, $this->startRow));
31+
if ($this->maxRows < 1) {
32+
throw new InvalidArgumentException(\sprintf('Max rows "%d" must be greater than 0', $this->maxRows));
2633
}
34+
35+
$this->endRow = min($endRow, $this->maxRows);
2736
}
2837

2938
public function nextRows(int $count) : self
@@ -34,8 +43,9 @@ public function nextRows(int $count) : self
3443

3544
return new self(
3645
$this->columnRange,
37-
$this->endRow + 1,
38-
$this->endRow + $count,
46+
min($this->endRow + 1, $this->maxRows),
47+
min($this->endRow + $count, $this->maxRows),
48+
$this->maxRows,
3949
);
4050
}
4151

@@ -47,7 +57,7 @@ public function toString() : string
4757
$this->columnRange->startColumn,
4858
$this->startRow,
4959
$this->columnRange->endColumn,
50-
$this->endRow
60+
$this->endRow,
5161
);
5262
}
5363
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"spreadsheetId": "eae3913a86de8d5ccfe841fd3c7ec50dba3902045232",
3+
"valueRanges": [
4+
{
5+
"range": "Sheet!A1:C10",
6+
"majorDimension": "ROWS",
7+
"values": [
8+
["Header 1", "Header 2", "Header 3"],
9+
["A2", "B2", "C2"],
10+
["A3", "B3", "C3"],
11+
["A4", "B4", "C4"],
12+
["A5", "B5", "C5"],
13+
["A6", "B6", "C6"],
14+
["A7", "B7", "C7"],
15+
["A8", "B8", "C8"],
16+
["A9", "B9", "C9"],
17+
["A10", "B10", "C10"]
18+
]
19+
},
20+
{
21+
"range": "Sheet!A11:C20",
22+
"majorDimension": "ROWS",
23+
"values": [
24+
["A11", "B11", "C11"],
25+
["A12", "B12", "C12"],
26+
["A13", "B13", "C13"],
27+
["A14", "B14", "C14"],
28+
["A15", "B15", "C15"],
29+
["A16", "B16", "C16"],
30+
["A17", "B17", "C17"],
31+
["A18", "B18", "C18"],
32+
["A19", "B19", "C19"],
33+
["A20", "B20", "C20"]
34+
]
35+
}
36+
]
37+
}
Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
{
2-
"range": "Sheet!A1:D11",
3-
"majorDimension": "ROWS",
4-
"values": [
5-
["Header 1", "Header 2", "Header 3"],
6-
["A2", "B2", "C2"],
7-
["A3", "B3", "C3"],
8-
["A4", "B4", "C4"],
9-
["A5", "B5", "C5"],
10-
["A6", "B6", "C6"],
11-
["A7", "B7", "C7"],
12-
["A8", "B8", "C8"],
13-
["A9", "B9", "C9"],
14-
["A10", "B10", "C10"],
15-
["A11", "B11", "C11", "D11"]
2+
"spreadsheetId": "eae3913a86de8d5ccfe841fd3c7ec50dba3902045232",
3+
"valueRanges": [
4+
{
5+
"range": "Sheet!A1:D11",
6+
"majorDimension": "ROWS",
7+
"values": [
8+
["Header 1", "Header 2", "Header 3"],
9+
["A2", "B2", "C2"],
10+
["A3", "B3", "C3"],
11+
["A4", "B4", "C4"],
12+
["A5", "B5", "C5"],
13+
["A6", "B6", "C6"],
14+
["A7", "B7", "C7"],
15+
["A8", "B8", "C8"],
16+
["A9", "B9", "C9"],
17+
["A10", "B10", "C10"],
18+
["A11", "B11", "C11", "D11"]
19+
]
20+
}
1621
]
1722
}
Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,23 @@
11
{
2-
"range": "Sheet!A1:D11",
3-
"majorDimension": "ROWS",
4-
"values": [
5-
[],
6-
["Header 1", "Header 2", "Header 3"],
7-
["A2", "B2", "C2"],
8-
["A3", "B3", "C3"],
9-
["A4", "B4", "C4"],
10-
["A5", "B5", "C5"],
11-
["A6", "B6", "C6"],
12-
["A7", "B7", "C7"],
13-
["A8", "B8", "C8"],
14-
["A9", "B9", "C9"],
15-
["A10", "B10", "C10"],
16-
["A11", "B11", "C11", "D11"]
2+
"spreadsheetId": "eae3913a86de8d5ccfe841fd3c7ec50dba3902045232",
3+
"valueRanges": [
4+
{
5+
"range": "Sheet!A1:D11",
6+
"majorDimension": "ROWS",
7+
"values": [
8+
[],
9+
["Header 1", "Header 2", "Header 3"],
10+
["A2", "B2", "C2"],
11+
["A3", "B3", "C3"],
12+
["A4", "B4", "C4"],
13+
["A5", "B5", "C5"],
14+
["A6", "B6", "C6"],
15+
["A7", "B7", "C7"],
16+
["A8", "B8", "C8"],
17+
["A9", "B9", "C9"],
18+
["A10", "B10", "C10"],
19+
["A11", "B11", "C11", "D11"]
20+
]
21+
}
1722
]
1823
}
Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,16 @@
11
{
2-
"range": "Sheet!A1:C5",
3-
"majorDimension": "ROWS",
4-
"values": [
5-
["Header 1", "Header 2", "Header 3"],
6-
["A2", "B2", "C2"],
7-
["A3", "B3"],
8-
["A4", "B4"],
9-
["A5", "B5", "C5"]
2+
"spreadsheetId": "eae3913a86de8d5ccfe841fd3c7ec50dba3902045232",
3+
"valueRanges": [
4+
{
5+
"range": "Sheet!A1:C5",
6+
"majorDimension": "ROWS",
7+
"values": [
8+
["Header 1", "Header 2", "Header 3"],
9+
["A2", "B2", "C2"],
10+
["A3", "B3"],
11+
["A4", "B4"],
12+
["A5", "B5", "C5"]
13+
]
14+
}
1015
]
1116
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
{
2+
"spreadsheetId": "eae3913a86de8d5ccfe841fd3c7ec50dba3902045232",
3+
"properties": {
4+
"title": "Spreadsheet 1",
5+
"locale": "en_US",
6+
"autoRecalc": "ON_CHANGE",
7+
"timeZone": "Europe/Warsaw",
8+
"defaultFormat": {
9+
"backgroundColor": {
10+
"red": 1,
11+
"green": 1,
12+
"blue": 1
13+
},
14+
"padding": {
15+
"top": 2,
16+
"right": 3,
17+
"bottom": 2,
18+
"left": 3
19+
},
20+
"verticalAlignment": "BOTTOM",
21+
"wrapStrategy": "OVERFLOW_CELL",
22+
"textFormat": {
23+
"foregroundColor": {},
24+
"fontFamily": "arial,sans,sans-serif",
25+
"fontSize": 10,
26+
"bold": false,
27+
"italic": false,
28+
"strikethrough": false,
29+
"underline": false,
30+
"foregroundColorStyle": {
31+
"rgbColor": {}
32+
}
33+
},
34+
"backgroundColorStyle": {
35+
"rgbColor": {
36+
"red": 1,
37+
"green": 1,
38+
"blue": 1
39+
}
40+
}
41+
},
42+
"spreadsheetTheme": {
43+
"primaryFontFamily": "Arial",
44+
"themeColors": [
45+
{
46+
"colorType": "TEXT",
47+
"color": {
48+
"rgbColor": {}
49+
}
50+
}
51+
]
52+
}
53+
},
54+
"sheets": [
55+
{
56+
"properties": {
57+
"sheetId": 0,
58+
"title": "Sheet",
59+
"index": 0,
60+
"sheetType": "GRID",
61+
"gridProperties": {
62+
"rowCount": 20,
63+
"columnCount": 6
64+
}
65+
}
66+
}
67+
],
68+
"spreadsheetUrl": "https://docs.google.com/spreadsheets/d/eae3913a86de8d5ccfe841fd3c7ec50dba3902045232/edit"
69+
}

0 commit comments

Comments
 (0)