Skip to content

Commit 42ff497

Browse files
authored
Merge pull request #112 from utopia-php/feat-csv-export
Feat CSV export
2 parents c42935a + c46083a commit 42ff497

5 files changed

Lines changed: 724 additions & 16 deletions

File tree

src/Migration/Destinations/CSV.php

Lines changed: 295 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,295 @@
1+
<?php
2+
3+
namespace Utopia\Migration\Destinations;
4+
5+
use Utopia\CLI\Console;
6+
use Utopia\Database\Exception\Authorization;
7+
use Utopia\Database\Exception\Conflict;
8+
use Utopia\Database\Exception\Structure;
9+
use Utopia\Migration\Destination;
10+
use Utopia\Migration\Resource;
11+
use Utopia\Migration\Resources\Database\Row;
12+
use Utopia\Migration\Transfer;
13+
use Utopia\Storage\Device;
14+
use Utopia\Storage\Device\Local;
15+
16+
class CSV extends Destination
17+
{
18+
protected Device $deviceForFiles;
19+
protected string $resourceId;
20+
protected string $directory;
21+
protected string $outputFile;
22+
protected Local $local;
23+
24+
protected array $allowedColumns = [];
25+
26+
/**
27+
* @throws Authorization
28+
* @throws Structure
29+
* @throws Conflict
30+
* @throws \Exception
31+
*/
32+
public function __construct(
33+
Device $deviceForFiles,
34+
string $resourceId,
35+
string $directory,
36+
string $filename,
37+
array $allowedColumns = [],
38+
private readonly string $delimiter = ',',
39+
private readonly string $enclosure = '"',
40+
private readonly string $escape = '\\',
41+
private readonly bool $includeHeaders = true,
42+
) {
43+
$this->deviceForFiles = $deviceForFiles;
44+
$this->resourceId = $resourceId;
45+
$this->directory = $directory;
46+
$this->outputFile = $this->sanitizeFilename($filename);
47+
$this->local = new Local(\sys_get_temp_dir() . '/csv_export_' . uniqid());
48+
$this->local->setTransferChunkSize(Transfer::STORAGE_MAX_CHUNK_SIZE);
49+
$this->createDirectory($this->local->getRoot());
50+
51+
foreach ($allowedColumns as $attribute) {
52+
$this->allowedColumns[$attribute] = true;
53+
}
54+
}
55+
56+
public static function getName(): string
57+
{
58+
return 'CSV';
59+
}
60+
61+
public static function getSupportedResources(): array
62+
{
63+
return [
64+
Resource::TYPE_ROW,
65+
];
66+
}
67+
68+
public function report(array $resources = []): array
69+
{
70+
return [];
71+
}
72+
73+
/**
74+
* @param array<Row> $resources
75+
* @throws \JsonException
76+
* @throws \Exception
77+
*/
78+
protected function import(array $resources, callable $callback): void
79+
{
80+
$handle = null; // file handle
81+
$buffer = ['lines' => [], 'size' => 0]; // Buffer for batching writes
82+
$bufferBytes = 1024 * 1024; // 1MB
83+
$log = $this->local->getRoot() . '/' . $this->outputFile . '.csv';
84+
85+
$flushBuffer = function () use ($log, &$handle, &$buffer) {
86+
if (empty($buffer['lines'])) {
87+
return;
88+
}
89+
try {
90+
if (!isset($handle)) {
91+
$handle = \fopen($log, 'a');
92+
if ($handle === false) {
93+
throw new \Exception("Failed to open file for writing: $log");
94+
}
95+
}
96+
97+
foreach ($buffer['lines'] as $line) {
98+
if (\fputcsv($handle, $line, $this->delimiter, $this->enclosure, $this->escape) === false) {
99+
throw new \Exception("Failed to write CSV line to file: $log");
100+
}
101+
}
102+
103+
$buffer = [
104+
'lines' => [],
105+
'size' => 0
106+
];
107+
} catch (\Exception $e) {
108+
// Close handle on error
109+
if (isset($handle)) {
110+
\fclose($handle);
111+
unset($handle);
112+
}
113+
throw $e;
114+
}
115+
};
116+
117+
try {
118+
foreach ($resources as $resource) {
119+
if (!($resource instanceof Row)) {
120+
continue;
121+
}
122+
123+
$csvData = $this->resourceToCSVData($resource);
124+
125+
// Write headers if this is the first row of the file
126+
if (!isset($csvHeader) && $this->includeHeaders) {
127+
$headers = \array_keys($csvData);
128+
$buffer['lines'][] = $headers;
129+
$buffer['size'] += \strlen(\implode($this->delimiter, $headers)) + 2; // Approximate size
130+
$csvHeader = true;
131+
}
132+
133+
$dataValues = \array_values($csvData);
134+
$buffer['lines'][] = $dataValues;
135+
$buffer['size'] += \strlen(\implode($this->delimiter, $dataValues)) + 2; // Approximate size
136+
137+
if ($buffer['size'] >= $bufferBytes) {
138+
$flushBuffer();
139+
}
140+
141+
$resource->setStatus(Resource::STATUS_SUCCESS);
142+
if (isset($this->cache)) {
143+
$this->cache->update($resource);
144+
}
145+
}
146+
147+
// Flush any remaining buffered lines
148+
if (!empty($buffer['lines'])) {
149+
$flushBuffer();
150+
}
151+
} finally {
152+
if (\is_resource($handle)) {
153+
\fclose($handle);
154+
}
155+
}
156+
157+
$callback($resources);
158+
}
159+
160+
/**
161+
* @throws \Exception
162+
*/
163+
public function shutdown(): void
164+
{
165+
$filename = $this->outputFile . '.csv';
166+
$sourcePath = $this->local->getPath($filename);
167+
$destPath = $this->deviceForFiles->getPath($this->directory . '/' . $filename);
168+
169+
// Check if the CSV file was actually created
170+
if (!$this->local->exists($sourcePath)) {
171+
throw new \Exception("No data to export for resource: $this->resourceId");
172+
}
173+
174+
try {
175+
// Transfer expects absolute paths within each device
176+
$result = $this->local->transfer(
177+
$sourcePath,
178+
$destPath,
179+
$this->deviceForFiles
180+
);
181+
if ($result === false) {
182+
throw new \Exception('Error transferring to ' . $this->deviceForFiles->getRoot() . '/' . $filename);
183+
}
184+
if (!$this->deviceForFiles->exists($destPath)) {
185+
throw new \Exception('File not found on destination: ' . $destPath);
186+
}
187+
} finally {
188+
// Clean up the temporary directory
189+
if (!$this->local->deletePath('') || $this->local->exists($this->local->getRoot())) {
190+
Console::error('Error cleaning up: ' . $this->local->getRoot());
191+
}
192+
}
193+
}
194+
195+
/**
196+
* Helper to ensure a directory exists.
197+
* @throws \Exception
198+
*/
199+
protected function createDirectory(string $path): void
200+
{
201+
if (!\file_exists($path)) {
202+
if (!\mkdir($path, 0755, true)) {
203+
throw new \Exception('Error creating directory: ' . $path);
204+
}
205+
}
206+
}
207+
208+
/**
209+
* Sanitize a filename to make it filesystem-safe
210+
*/
211+
protected function sanitizeFilename(string $filename): string
212+
{
213+
// Replace problematic characters with underscores
214+
$sanitized = \preg_replace('/[:\\/<>"|*?]/', '_', $filename);
215+
$sanitized = \preg_replace('/[^\x20-\x7E]/', '_', $sanitized);
216+
$sanitized = \trim($sanitized);
217+
return empty($sanitized) ? 'export' : $sanitized;
218+
}
219+
220+
/**
221+
* Convert a resource to CSV-compatible data
222+
*/
223+
protected function resourceToCSVData(Row $resource): array
224+
{
225+
$data = [
226+
'$id' => $resource->getId(),
227+
'$permissions' => $resource->getPermissions(),
228+
'$createdAt' => $resource->getCreatedAt(),
229+
'$updatedAt' => $resource->getUpdatedAt(),
230+
];
231+
232+
// Add all attributes if no filter specified, otherwise only allowed ones
233+
if (empty($this->allowedColumns)) {
234+
$data = \array_merge($data, $resource->getData());
235+
} else {
236+
foreach ($resource->getData() as $key => $value) {
237+
if (isset($this->allowedColumns[$key])) {
238+
$data[$key] = $value;
239+
}
240+
}
241+
}
242+
243+
foreach ($data as $key => $value) {
244+
$data[$key] = $this->convertValueToCSV($value);
245+
}
246+
247+
return $data;
248+
}
249+
250+
/**
251+
* Convert a single value to CSV-compatible format
252+
*/
253+
protected function convertValueToCSV(mixed $value): string
254+
{
255+
if (\is_null($value)) {
256+
return 'null';
257+
}
258+
if (\is_bool($value)) {
259+
return $value ? 'true' : 'false';
260+
}
261+
if (\is_array($value)) {
262+
return $this->convertArrayToCSV($value);
263+
}
264+
if (\is_object($value)) {
265+
return $this->convertObjectToCSV($value);
266+
}
267+
return (string)$value;
268+
}
269+
270+
/**
271+
* Convert array to CSV format
272+
*/
273+
protected function convertArrayToCSV(array $value): string
274+
{
275+
if (empty($value)) {
276+
return '';
277+
}
278+
if (isset($value['$id'])) {
279+
return $value['$id'];
280+
}
281+
return \json_encode($value);
282+
}
283+
284+
/**
285+
* Convert object to CSV format
286+
*/
287+
protected function convertObjectToCSV($value): string
288+
{
289+
if ($value instanceof Row) {
290+
return $value->getId();
291+
}
292+
return \json_encode($value);
293+
}
294+
295+
}

src/Migration/Resources/Database/Row.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@ class Row extends Resource
1414
* @param array<string> $permissions
1515
*/
1616
public function __construct(
17-
string $id,
17+
string $id,
1818
private readonly Table $table,
1919
private readonly array $data = [],
20-
array $permissions = []
20+
array $permissions = []
2121
) {
2222
$this->id = $id;
2323
$this->permissions = $permissions;

src/Migration/Sources/Appwrite.php

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -710,7 +710,17 @@ private function exportDatabases(int $batchSize): void
710710
$queries = [$this->database->queryLimit($batchSize)];
711711

712712
if ($this->rootResourceId !== '' && $this->rootResourceType === Resource::TYPE_DATABASE) {
713-
$queries[] = $this->database->queryEqual('$id', [$this->rootResourceId]);
713+
$targetDatabaseId = $this->rootResourceId;
714+
715+
// Handle database:collection format - extract database ID
716+
if (\str_contains($this->rootResourceId, ':')) {
717+
$parts = \explode(':', $this->rootResourceId, 2);
718+
if (\count($parts) === 2) {
719+
$targetDatabaseId = $parts[0];
720+
}
721+
}
722+
723+
$queries[] = $this->database->queryEqual('$id', [$targetDatabaseId]);
714724
$queries[] = $this->database->queryLimit(1);
715725
}
716726

@@ -738,11 +748,11 @@ private function exportDatabases(int $batchSize): void
738748
break;
739749
}
740750

741-
$lastDatabase = $databases[count($databases) - 1];
751+
$lastDatabase = $databases[\count($databases) - 1];
742752

743753
$this->callback($databases);
744754

745-
if (count($databases) < $batchSize) {
755+
if (\count($databases) < $batchSize) {
746756
break;
747757
}
748758
}
@@ -757,14 +767,33 @@ private function exportTables(int $batchSize): void
757767
$databases = $this->cache->get(Database::getName());
758768

759769
foreach ($databases as $database) {
770+
/** @var Database $database */
760771
$lastTable = null;
761772

762-
/** @var Database $database */
763773
while (true) {
764774
$queries = [$this->database->queryLimit($batchSize)];
765775
$tables = [];
766776

767-
if ($lastTable) {
777+
// Filter to specific table if rootResourceType is database with database:collection format
778+
if (
779+
$this->rootResourceId !== '' &&
780+
$this->rootResourceType === Resource::TYPE_DATABASE &&
781+
\str_contains($this->rootResourceId, ':')
782+
) {
783+
$parts = \explode(':', $this->rootResourceId, 2);
784+
if (\count($parts) === 2) {
785+
$targetTableId = $parts[1]; // table ID
786+
$queries[] = $this->database->queryEqual('$id', [$targetTableId]);
787+
$queries[] = $this->database->queryLimit(1);
788+
}
789+
} elseif (
790+
$this->rootResourceId !== '' &&
791+
$this->rootResourceType === Resource::TYPE_TABLE
792+
) {
793+
$targetTableId = $this->rootResourceId;
794+
$queries[] = $this->database->queryEqual('$id', [$targetTableId]);
795+
$queries[] = $this->database->queryLimit(1);
796+
} elseif ($lastTable) {
768797
$queries[] = $this->database->queryCursorAfter($lastTable);
769798
}
770799

@@ -790,9 +819,9 @@ private function exportTables(int $batchSize): void
790819

791820
$this->callback($tables);
792821

793-
$lastTable = $tables[count($tables) - 1];
822+
$lastTable = $tables[\count($tables) - 1];
794823

795-
if (count($tables) < $batchSize) {
824+
if (\count($tables) < $batchSize) {
796825
break;
797826
}
798827
}
@@ -807,7 +836,7 @@ private function exportColumns(int $batchSize): void
807836
{
808837
$tables = $this->cache->get(Table::getName());
809838

810-
/** @var Table[] $tables */
839+
/** @var array<Table> $tables */
811840
foreach ($tables as $table) {
812841
$lastColumn = null;
813842

0 commit comments

Comments
 (0)