@@ -32,6 +32,11 @@ class CSV extends Source
3232
3333 private bool $ downloaded = false ;
3434
35+ // caching
36+ private ?array $ cachedAttributes = null ;
37+
38+ private ?array $ attributeMetadata = null ;
39+
3540 public function __construct (
3641 string $ resourceId ,
3742 string $ filePath ,
@@ -120,73 +125,19 @@ protected function exportGroupDatabases(int $batchSize, array $resources): void
120125 */
121126 private function exportDocuments (int $ batchSize ): void
122127 {
128+ $ this ->loadAndCacheAttributes ($ batchSize );
123129
124- $ attributes = [];
125- $ lastAttribute = null ;
126-
127- [$ databaseId , $ collectionId ] = explode (': ' , $ this ->resourceId );
128- $ database = new Database ($ databaseId , '' );
129- $ collection = new Collection ($ database , '' , $ collectionId );
130-
131- while (true ) {
132- $ queries = [$ this ->database ->queryLimit ($ batchSize )];
133- if ($ lastAttribute ) {
134- $ queries [] = $ this ->database ->queryCursorAfter ($ lastAttribute );
135- }
136-
137- $ fetched = $ this ->database ->listAttributes ($ collection , $ queries );
138- if (empty ($ fetched )) {
139- break ;
140- }
141-
142- array_push ($ attributes , ...$ fetched );
143- $ lastAttribute = $ fetched [count ($ fetched ) - 1 ];
144-
145- if (count ($ fetched ) < $ batchSize ) {
146- break ;
147- }
148- }
149-
150- $ arrayKeys = [];
151- $ attributeTypes = [];
152- $ manyToManyKeys = [];
153-
154- foreach ($ attributes as $ attribute ) {
155- $ key = $ attribute ['key ' ];
156- $ type = $ attribute ['type ' ];
157- $ isArray = $ attribute ['array ' ] ?? false ;
158- $ relationSide = $ attribute ['side ' ] ?? '' ;
159- $ relationType = $ attribute ['relationType ' ] ?? '' ;
160-
161- if (
162- $ type === Attribute::TYPE_RELATIONSHIP &&
163- $ relationSide === UtopiaDatabase::RELATION_SIDE_CHILD
164- ) {
165- continue ;
166- }
167-
168- $ attributeTypes [$ key ] = $ type ;
130+ [$ databaseId , $ collectionId ] = explode (": " , $ this ->resourceId );
131+ $ database = new Database ($ databaseId , "" );
132+ $ collection = new Collection ($ database , "" , $ collectionId );
169133
170- if (
171- $ type === Attribute::TYPE_RELATIONSHIP &&
172- $ relationType === 'manyToMany ' &&
173- $ relationSide === 'parent '
174- ) {
175- $ manyToManyKeys [] = $ key ;
176- }
177-
178- if ($ isArray && $ type !== Attribute::TYPE_RELATIONSHIP ) {
179- $ arrayKeys [] = $ key ;
180- }
181- }
182-
183- $ this ->withCSVStream (function ($ stream ) use ($ attributeTypes , $ manyToManyKeys , $ arrayKeys , $ collection , $ batchSize ) {
134+ $ this ->withCsvStream (function ($ stream ) use ($ batchSize , $ collection ) {
184135 $ headers = fgetcsv ($ stream );
185136 if (! is_array ($ headers ) || count ($ headers ) === 0 ) {
186137 return ;
187138 }
188139
189- $ this ->validateCSVHeaders ($ headers , $ attributeTypes );
140+ $ this ->validateCSVHeaders ($ headers , array_column ( $ this -> attributeMetadata , ' type ' ) );
190141
191142 $ buffer = [];
192143
@@ -200,59 +151,7 @@ private function exportDocuments(int $batchSize): void
200151 continue ;
201152 }
202153
203- $ parsedData = $ data ;
204-
205- foreach ($ data as $ key => $ value ) {
206- $ parsedValue = trim ($ value );
207- $ type = $ attributeTypes [$ key ] ?? null ;
208-
209- if (! isset ($ type )) {
210- continue ;
211- }
212-
213- if (in_array ($ key , $ manyToManyKeys , true )) {
214- $ parsedData [$ key ] = $ parsedValue === ''
215- ? []
216- : array_values (
217- array_filter (
218- array_map (
219- 'trim ' ,
220- explode (', ' , $ parsedValue )
221- )
222- )
223- );
224- continue ;
225- }
226-
227- if (in_array ($ key , $ arrayKeys , true )) {
228- if ($ parsedValue === '' ) {
229- $ parsedData [$ key ] = [];
230- } else {
231- $ arrayValues = str_getcsv ($ parsedValue );
232- $ arrayValues = array_map ('trim ' , $ arrayValues );
233-
234- $ parsedData [$ key ] = array_map (function ($ item ) use ($ type ) {
235- return match ($ type ) {
236- Attribute::TYPE_INTEGER => is_numeric ($ item ) ? (int ) $ item : null ,
237- Attribute::TYPE_FLOAT => is_numeric ($ item ) ? (float ) $ item : null ,
238- Attribute::TYPE_BOOLEAN => filter_var ($ item , FILTER_VALIDATE_BOOLEAN ),
239- default => $ item ,
240- };
241- }, $ arrayValues );
242- }
243- continue ;
244- }
245-
246- if ($ parsedValue !== '' ) {
247- $ parsedData [$ key ] = match ($ type ) {
248- Attribute::TYPE_INTEGER => is_numeric ($ parsedValue ) ? (int ) $ parsedValue : null ,
249- Attribute::TYPE_FLOAT => is_numeric ($ parsedValue ) ? (float ) $ parsedValue : null ,
250- Attribute::TYPE_BOOLEAN => filter_var ($ parsedValue , FILTER_VALIDATE_BOOLEAN ),
251- default => $ parsedValue ,
252- };
253- }
254- }
255-
154+ $ parsedData = $ this ->parseRow ($ data );
256155 $ documentId = $ parsedData ['$id ' ] ?? 'unique() ' ;
257156
258157 // `$id`, `$permissions` in the doc can cause issues!
@@ -313,6 +212,159 @@ protected function exportGroupFunctions(int $batchSize, array $resources): void
313212 throw new \Exception ('Not Implemented ' );
314213 }
315214
215+ /**
216+ * @param int $batchSize
217+ * @return void
218+ * @throws Exception
219+ * @throws \Utopia\Database\Exception
220+ */
221+ private function loadAndCacheAttributes (int $ batchSize ): void
222+ {
223+ if ($ this ->cachedAttributes !== null ) {
224+ return ;
225+ }
226+
227+ [ $ databaseId , $ collectionId ] = explode (": " , $ this ->resourceId );
228+
229+ $ database = new Database ($ databaseId , "" );
230+ $ collection = new Collection ($ database , "" , $ collectionId );
231+
232+ $ attributes = [];
233+ $ lastAttribute = null ;
234+
235+ while (true ) {
236+ $ queries = [$ this ->database ->queryLimit ($ batchSize )];
237+ if ($ lastAttribute ) {
238+ $ queries [] = $ this ->database ->queryCursorAfter ($ lastAttribute );
239+ }
240+
241+ $ fetched = $ this ->database ->listAttributes ($ collection , $ queries );
242+ if (empty ($ fetched )) {
243+ break ;
244+ }
245+
246+ array_push ($ attributes , ...$ fetched );
247+ $ lastAttribute = $ fetched [count ($ fetched ) - 1 ];
248+
249+ if (count ($ fetched ) < $ batchSize ) {
250+ break ;
251+ }
252+ }
253+
254+ $ this ->cachedAttributes = $ attributes ;
255+ $ this ->preprocessAttributes ();
256+ }
257+
258+ /**
259+ * @return void
260+ */
261+ private function preprocessAttributes (): void
262+ {
263+ $ this ->attributeMetadata = [];
264+
265+ foreach ($ this ->cachedAttributes as $ attribute ) {
266+ $ key = $ attribute ["key " ];
267+ $ type = $ attribute ["type " ];
268+ $ isArray = $ attribute ["array " ] ?? false ;
269+ $ relationSide = $ attribute ["side " ] ?? "" ;
270+ $ relationType = $ attribute ["relationType " ] ?? "" ;
271+
272+ if (
273+ $ type === Attribute::TYPE_RELATIONSHIP &&
274+ $ relationSide === UtopiaDatabase::RELATION_SIDE_CHILD
275+ ) {
276+ continue ;
277+ }
278+
279+ $ this ->attributeMetadata [$ key ] = [
280+ "type " => $ type ,
281+ "isArray " => $ isArray && $ type !== Attribute::TYPE_RELATIONSHIP ,
282+ "isManyToMany " =>
283+ $ type === Attribute::TYPE_RELATIONSHIP &&
284+ $ relationType === "manyToMany " &&
285+ $ relationSide === "parent " ,
286+ ];
287+ }
288+ }
289+
290+ /**
291+ * @param array $data
292+ * @return array
293+ */
294+ private function parseRow (array $ data ): array
295+ {
296+ $ parsedData = [];
297+
298+ foreach ($ data as $ key => $ value ) {
299+ $ parsedValue = trim ($ value );
300+
301+ if (!isset ($ this ->attributeMetadata [$ key ])) {
302+ continue ;
303+ }
304+
305+ $ metadata = $ this ->attributeMetadata [$ key ];
306+ $ type = $ metadata ["type " ];
307+
308+ if ($ metadata ["isManyToMany " ]) {
309+ $ parsedData [$ key ] = $ parsedValue === ''
310+ ? []
311+ : array_values (
312+ array_filter (
313+ array_map (
314+ 'trim ' ,
315+ explode (', ' , $ parsedValue )
316+ )
317+ )
318+ );
319+ continue ;
320+ }
321+
322+ // array attributes
323+ if ($ metadata ["isArray " ]) {
324+ if ($ parsedValue === "" ) {
325+ $ parsedData [$ key ] = [];
326+ } else {
327+ $ arrayValues = str_getcsv ($ parsedValue );
328+ $ arrayValues = array_map ('trim ' , $ arrayValues );
329+ $ parsedData [$ key ] = $ this ->convertScalars ($ arrayValues , $ type , true );
330+ }
331+ continue ;
332+ }
333+
334+ // normal attributes
335+ if ($ parsedValue !== "" ) {
336+ $ parsedData [$ key ] = $ this ->convertScalars ($ parsedValue , $ type );
337+ } else {
338+ $ parsedData [$ key ] = $ parsedValue ;
339+ }
340+ }
341+
342+ return $ parsedData ;
343+ }
344+
345+ private function convertScalars (mixed $ value , string $ type , bool $ isArray = false ): mixed
346+ {
347+ $ values = is_array ($ value ) ? $ value : [$ value ];
348+
349+ $ converted = match ($ type ) {
350+ Attribute::TYPE_INTEGER => array_map (
351+ fn ($ v ) => ($ v = trim ($ v )) !== '' && is_numeric ($ v ) ? (int ) $ v : null ,
352+ $ values
353+ ),
354+ Attribute::TYPE_FLOAT => array_map (
355+ fn ($ v ) => ($ v = trim ($ v )) !== '' && is_numeric ($ v ) ? (float ) $ v : null ,
356+ $ values
357+ ),
358+ Attribute::TYPE_BOOLEAN => array_map (
359+ fn ($ v ) => filter_var (trim ($ v ), FILTER_VALIDATE_BOOLEAN ),
360+ $ values
361+ ),
362+ default => array_map ('trim ' , $ values ),
363+ };
364+
365+ return $ isArray ? $ converted : $ converted [0 ];
366+ }
367+
316368 /**
317369 * @param callable(resource $stream): void $callback
318370 * @return void
0 commit comments