Skip to content

Commit 8c62557

Browse files
committed
update: improve logic.
1 parent 0c85917 commit 8c62557

1 file changed

Lines changed: 165 additions & 113 deletions

File tree

src/Migration/Sources/CSV.php

Lines changed: 165 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ class CSV extends Source
3232

3333
private bool $downloaded = false;
3434

35+
// caching
36+
private ?array $cachedAttributes = null;
37+
38+
private ?array $attributeMetadata = null;
39+
3540
public function __construct(
3641
string $resourceId,
3742
string $filePath,
@@ -120,73 +125,19 @@ protected function exportGroupDatabases(int $batchSize, array $resources): void
120125
*/
121126
private function exportDocuments(int $batchSize): void
122127
{
128+
$this->loadAndCacheAttributes($batchSize);
123129

124-
$attributes = [];
125-
$lastAttribute = null;
126-
127-
[$databaseId, $collectionId] = explode(':', $this->resourceId);
128-
$database = new Database($databaseId, '');
129-
$collection = new Collection($database, '', $collectionId);
130-
131-
while (true) {
132-
$queries = [$this->database->queryLimit($batchSize)];
133-
if ($lastAttribute) {
134-
$queries[] = $this->database->queryCursorAfter($lastAttribute);
135-
}
136-
137-
$fetched = $this->database->listAttributes($collection, $queries);
138-
if (empty($fetched)) {
139-
break;
140-
}
141-
142-
array_push($attributes, ...$fetched);
143-
$lastAttribute = $fetched[count($fetched) - 1];
144-
145-
if (count($fetched) < $batchSize) {
146-
break;
147-
}
148-
}
149-
150-
$arrayKeys = [];
151-
$attributeTypes = [];
152-
$manyToManyKeys = [];
153-
154-
foreach ($attributes as $attribute) {
155-
$key = $attribute['key'];
156-
$type = $attribute['type'];
157-
$isArray = $attribute['array'] ?? false;
158-
$relationSide = $attribute['side'] ?? '';
159-
$relationType = $attribute['relationType'] ?? '';
160-
161-
if (
162-
$type === Attribute::TYPE_RELATIONSHIP &&
163-
$relationSide === UtopiaDatabase::RELATION_SIDE_CHILD
164-
) {
165-
continue;
166-
}
167-
168-
$attributeTypes[$key] = $type;
130+
[$databaseId, $collectionId] = explode(":", $this->resourceId);
131+
$database = new Database($databaseId, "");
132+
$collection = new Collection($database, "", $collectionId);
169133

170-
if (
171-
$type === Attribute::TYPE_RELATIONSHIP &&
172-
$relationType === 'manyToMany' &&
173-
$relationSide === 'parent'
174-
) {
175-
$manyToManyKeys[] = $key;
176-
}
177-
178-
if ($isArray && $type !== Attribute::TYPE_RELATIONSHIP) {
179-
$arrayKeys[] = $key;
180-
}
181-
}
182-
183-
$this->withCSVStream(function ($stream) use ($attributeTypes, $manyToManyKeys, $arrayKeys, $collection, $batchSize) {
134+
$this->withCsvStream(function ($stream) use ($batchSize, $collection) {
184135
$headers = fgetcsv($stream);
185136
if (! is_array($headers) || count($headers) === 0) {
186137
return;
187138
}
188139

189-
$this->validateCSVHeaders($headers, $attributeTypes);
140+
$this->validateCSVHeaders($headers, array_column($this->attributeMetadata, 'type'));
190141

191142
$buffer = [];
192143

@@ -200,59 +151,7 @@ private function exportDocuments(int $batchSize): void
200151
continue;
201152
}
202153

203-
$parsedData = $data;
204-
205-
foreach ($data as $key => $value) {
206-
$parsedValue = trim($value);
207-
$type = $attributeTypes[$key] ?? null;
208-
209-
if (! isset($type)) {
210-
continue;
211-
}
212-
213-
if (in_array($key, $manyToManyKeys, true)) {
214-
$parsedData[$key] = $parsedValue === ''
215-
? []
216-
: array_values(
217-
array_filter(
218-
array_map(
219-
'trim',
220-
explode(',', $parsedValue)
221-
)
222-
)
223-
);
224-
continue;
225-
}
226-
227-
if (in_array($key, $arrayKeys, true)) {
228-
if ($parsedValue === '') {
229-
$parsedData[$key] = [];
230-
} else {
231-
$arrayValues = str_getcsv($parsedValue);
232-
$arrayValues = array_map('trim', $arrayValues);
233-
234-
$parsedData[$key] = array_map(function ($item) use ($type) {
235-
return match ($type) {
236-
Attribute::TYPE_INTEGER => is_numeric($item) ? (int) $item : null,
237-
Attribute::TYPE_FLOAT => is_numeric($item) ? (float) $item : null,
238-
Attribute::TYPE_BOOLEAN => filter_var($item, FILTER_VALIDATE_BOOLEAN),
239-
default => $item,
240-
};
241-
}, $arrayValues);
242-
}
243-
continue;
244-
}
245-
246-
if ($parsedValue !== '') {
247-
$parsedData[$key] = match ($type) {
248-
Attribute::TYPE_INTEGER => is_numeric($parsedValue) ? (int) $parsedValue : null,
249-
Attribute::TYPE_FLOAT => is_numeric($parsedValue) ? (float) $parsedValue : null,
250-
Attribute::TYPE_BOOLEAN => filter_var($parsedValue, FILTER_VALIDATE_BOOLEAN),
251-
default => $parsedValue,
252-
};
253-
}
254-
}
255-
154+
$parsedData = $this->parseRow($data);
256155
$documentId = $parsedData['$id'] ?? 'unique()';
257156

258157
// `$id`, `$permissions` in the doc can cause issues!
@@ -313,6 +212,159 @@ protected function exportGroupFunctions(int $batchSize, array $resources): void
313212
throw new \Exception('Not Implemented');
314213
}
315214

215+
/**
216+
* @param int $batchSize
217+
* @return void
218+
* @throws Exception
219+
* @throws \Utopia\Database\Exception
220+
*/
221+
private function loadAndCacheAttributes(int $batchSize): void
222+
{
223+
if ($this->cachedAttributes !== null) {
224+
return;
225+
}
226+
227+
[ $databaseId, $collectionId ] = explode(":", $this->resourceId);
228+
229+
$database = new Database($databaseId, "");
230+
$collection = new Collection($database, "", $collectionId);
231+
232+
$attributes = [];
233+
$lastAttribute = null;
234+
235+
while (true) {
236+
$queries = [$this->database->queryLimit($batchSize)];
237+
if ($lastAttribute) {
238+
$queries[] = $this->database->queryCursorAfter($lastAttribute);
239+
}
240+
241+
$fetched = $this->database->listAttributes($collection, $queries);
242+
if (empty($fetched)) {
243+
break;
244+
}
245+
246+
array_push($attributes, ...$fetched);
247+
$lastAttribute = $fetched[count($fetched) - 1];
248+
249+
if (count($fetched) < $batchSize) {
250+
break;
251+
}
252+
}
253+
254+
$this->cachedAttributes = $attributes;
255+
$this->preprocessAttributes();
256+
}
257+
258+
/**
259+
* @return void
260+
*/
261+
private function preprocessAttributes(): void
262+
{
263+
$this->attributeMetadata = [];
264+
265+
foreach ($this->cachedAttributes as $attribute) {
266+
$key = $attribute["key"];
267+
$type = $attribute["type"];
268+
$isArray = $attribute["array"] ?? false;
269+
$relationSide = $attribute["side"] ?? "";
270+
$relationType = $attribute["relationType"] ?? "";
271+
272+
if (
273+
$type === Attribute::TYPE_RELATIONSHIP &&
274+
$relationSide === UtopiaDatabase::RELATION_SIDE_CHILD
275+
) {
276+
continue;
277+
}
278+
279+
$this->attributeMetadata[$key] = [
280+
"type" => $type,
281+
"isArray" => $isArray && $type !== Attribute::TYPE_RELATIONSHIP,
282+
"isManyToMany" =>
283+
$type === Attribute::TYPE_RELATIONSHIP &&
284+
$relationType === "manyToMany" &&
285+
$relationSide === "parent",
286+
];
287+
}
288+
}
289+
290+
/**
291+
* @param array $data
292+
* @return array
293+
*/
294+
private function parseRow(array $data): array
295+
{
296+
$parsedData = [];
297+
298+
foreach ($data as $key => $value) {
299+
$parsedValue = trim($value);
300+
301+
if (!isset($this->attributeMetadata[$key])) {
302+
continue;
303+
}
304+
305+
$metadata = $this->attributeMetadata[$key];
306+
$type = $metadata["type"];
307+
308+
if ($metadata["isManyToMany"]) {
309+
$parsedData[$key] = $parsedValue === ''
310+
? []
311+
: array_values(
312+
array_filter(
313+
array_map(
314+
'trim',
315+
explode(',', $parsedValue)
316+
)
317+
)
318+
);
319+
continue;
320+
}
321+
322+
// array attributes
323+
if ($metadata["isArray"]) {
324+
if ($parsedValue === "") {
325+
$parsedData[$key] = [];
326+
} else {
327+
$arrayValues = str_getcsv($parsedValue);
328+
$arrayValues = array_map('trim', $arrayValues);
329+
$parsedData[$key] = $this->convertScalars($arrayValues, $type, true);
330+
}
331+
continue;
332+
}
333+
334+
// normal attributes
335+
if ($parsedValue !== "") {
336+
$parsedData[$key] = $this->convertScalars($parsedValue, $type);
337+
} else {
338+
$parsedData[$key] = $parsedValue;
339+
}
340+
}
341+
342+
return $parsedData;
343+
}
344+
345+
private function convertScalars(mixed $value, string $type, bool $isArray = false): mixed
346+
{
347+
$values = is_array($value) ? $value : [$value];
348+
349+
$converted = match ($type) {
350+
Attribute::TYPE_INTEGER => array_map(
351+
fn ($v) => ($v = trim($v)) !== '' && is_numeric($v) ? (int) $v : null,
352+
$values
353+
),
354+
Attribute::TYPE_FLOAT => array_map(
355+
fn ($v) => ($v = trim($v)) !== '' && is_numeric($v) ? (float) $v : null,
356+
$values
357+
),
358+
Attribute::TYPE_BOOLEAN => array_map(
359+
fn ($v) => filter_var(trim($v), FILTER_VALIDATE_BOOLEAN),
360+
$values
361+
),
362+
default => array_map('trim', $values),
363+
};
364+
365+
return $isArray ? $converted : $converted[0];
366+
}
367+
316368
/**
317369
* @param callable(resource $stream): void $callback
318370
* @return void

0 commit comments

Comments
 (0)