Skip to content

Commit 6124bc8

Browse files
committed
fix: empty segment listing
Signed-off-by: ferhat elmas <elmas.ferhat@gmail.com>
1 parent f6e193a commit 6124bc8

5 files changed

Lines changed: 836 additions & 17 deletions

File tree

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
CREATE OR REPLACE FUNCTION storage.get_common_prefix(
2+
p_key TEXT,
3+
p_prefix TEXT,
4+
p_delimiter TEXT
5+
) RETURNS TEXT
6+
IMMUTABLE
7+
LANGUAGE plpgsql
8+
AS $$
9+
DECLARE
10+
v_prefix TEXT := coalesce(p_prefix, '');
11+
v_suffix TEXT;
12+
v_scan_index INT := 1;
13+
v_next_delimiter_index INT;
14+
BEGIN
15+
IF coalesce(p_delimiter, '') = '' THEN
16+
RETURN NULL;
17+
END IF;
18+
19+
IF v_prefix <> '' AND lower(left(p_key, length(v_prefix))) <> lower(v_prefix) THEN
20+
RETURN NULL;
21+
END IF;
22+
23+
v_suffix := substring(p_key FROM length(v_prefix) + 1);
24+
25+
WHILE left(substring(v_suffix FROM v_scan_index), length(p_delimiter)) = p_delimiter LOOP
26+
v_scan_index := v_scan_index + length(p_delimiter);
27+
END LOOP;
28+
29+
v_next_delimiter_index := position(p_delimiter IN substring(v_suffix FROM v_scan_index));
30+
IF v_next_delimiter_index = 0 THEN
31+
RETURN NULL;
32+
END IF;
33+
34+
RETURN left(
35+
p_key,
36+
length(v_prefix) + (v_scan_index - 1) + v_next_delimiter_index - 1 + length(p_delimiter)
37+
);
38+
END;
39+
$$;
40+
41+
CREATE OR REPLACE FUNCTION storage.get_prefix_child_name(
42+
p_key TEXT,
43+
p_prefix TEXT,
44+
p_delimiter TEXT
45+
) RETURNS TEXT
46+
IMMUTABLE
47+
LANGUAGE plpgsql
48+
AS $$
49+
DECLARE
50+
v_prefix TEXT := coalesce(p_prefix, '');
51+
v_suffix TEXT;
52+
v_scan_index INT := 1;
53+
v_trimmed_suffix TEXT;
54+
BEGIN
55+
IF coalesce(p_delimiter, '') = '' THEN
56+
RETURN NULL;
57+
END IF;
58+
59+
IF v_prefix <> '' AND lower(left(p_key, length(v_prefix))) <> lower(v_prefix) THEN
60+
RETURN NULL;
61+
END IF;
62+
63+
v_suffix := substring(p_key FROM length(v_prefix) + 1);
64+
65+
WHILE left(substring(v_suffix FROM v_scan_index), length(p_delimiter)) = p_delimiter LOOP
66+
v_scan_index := v_scan_index + length(p_delimiter);
67+
END LOOP;
68+
69+
v_trimmed_suffix := substring(v_suffix FROM v_scan_index);
70+
IF coalesce(v_trimmed_suffix, '') = '' THEN
71+
RETURN NULL;
72+
END IF;
73+
74+
RETURN split_part(v_trimmed_suffix, p_delimiter, 1);
75+
END;
76+
$$;
77+
78+
CREATE OR REPLACE FUNCTION storage.search(
79+
prefix text,
80+
bucketname text,
81+
limits int DEFAULT 100,
82+
levels int DEFAULT 1,
83+
offsets int DEFAULT 0,
84+
search text DEFAULT '',
85+
sortcolumn text DEFAULT 'name',
86+
sortorder text DEFAULT 'asc'
87+
)
88+
RETURNS TABLE (
89+
name text,
90+
id uuid,
91+
updated_at timestamptz,
92+
created_at timestamptz,
93+
last_accessed_at timestamptz,
94+
metadata jsonb
95+
)
96+
SECURITY INVOKER
97+
LANGUAGE plpgsql STABLE
98+
AS $func$
99+
DECLARE
100+
v_peek_name TEXT;
101+
v_current RECORD;
102+
v_common_prefix TEXT;
103+
v_delimiter CONSTANT TEXT := '/';
104+
105+
-- Configuration
106+
v_limit INT;
107+
v_prefix TEXT;
108+
v_prefix_lower TEXT;
109+
v_is_asc BOOLEAN;
110+
v_order_by TEXT;
111+
v_sort_order TEXT;
112+
v_upper_bound TEXT;
113+
v_file_batch_size INT;
114+
115+
-- Dynamic SQL for batch query only
116+
v_batch_query TEXT;
117+
118+
-- Seek state
119+
v_next_seek TEXT;
120+
v_count INT := 0;
121+
v_skipped INT := 0;
122+
v_has_pending_peek BOOLEAN := FALSE;
123+
BEGIN
124+
v_limit := LEAST(coalesce(limits, 100), 1500);
125+
v_prefix := coalesce(prefix, '') || coalesce(search, '');
126+
v_prefix_lower := lower(v_prefix);
127+
v_is_asc := lower(coalesce(sortorder, 'asc')) = 'asc';
128+
v_file_batch_size := LEAST(GREATEST(v_limit * 2, 100), 1000);
129+
130+
CASE lower(coalesce(sortcolumn, 'name'))
131+
WHEN 'name' THEN v_order_by := 'name';
132+
WHEN 'updated_at' THEN v_order_by := 'updated_at';
133+
WHEN 'created_at' THEN v_order_by := 'created_at';
134+
WHEN 'last_accessed_at' THEN v_order_by := 'last_accessed_at';
135+
ELSE v_order_by := 'name';
136+
END CASE;
137+
138+
v_sort_order := CASE WHEN v_is_asc THEN 'asc' ELSE 'desc' END;
139+
140+
IF v_order_by != 'name' THEN
141+
RETURN QUERY EXECUTE format(
142+
$sql$
143+
WITH folders AS (
144+
SELECT storage.get_prefix_child_name(objects.name, $1, '/') AS folder
145+
FROM storage.objects
146+
WHERE objects.name ILIKE $1 || '%%'
147+
AND bucket_id = $2
148+
AND storage.get_common_prefix(objects.name, $1, '/') IS NOT NULL
149+
GROUP BY folder
150+
), entries AS (
151+
SELECT folder AS "name",
152+
NULL::uuid AS id,
153+
NULL::timestamptz AS updated_at,
154+
NULL::timestamptz AS created_at,
155+
NULL::timestamptz AS last_accessed_at,
156+
NULL::jsonb AS metadata,
157+
0 AS sort_group
158+
FROM folders
159+
WHERE folder IS NOT NULL
160+
UNION ALL
161+
SELECT storage.get_prefix_child_name(objects.name, $1, '/') AS "name",
162+
id, updated_at, created_at, last_accessed_at, metadata,
163+
1 AS sort_group
164+
FROM storage.objects
165+
WHERE objects.name ILIKE $1 || '%%'
166+
AND bucket_id = $2
167+
AND storage.get_common_prefix(objects.name, $1, '/') IS NULL
168+
AND storage.get_prefix_child_name(objects.name, $1, '/') IS NOT NULL
169+
)
170+
SELECT "name", id, updated_at, created_at, last_accessed_at, metadata
171+
FROM entries
172+
ORDER BY sort_group ASC,
173+
CASE WHEN sort_group = 0 THEN "name" END %s,
174+
CASE WHEN sort_group = 1 THEN %I END %s,
175+
CASE WHEN sort_group = 1 THEN "name" END %s
176+
LIMIT $3 OFFSET $4
177+
$sql$, v_sort_order, v_order_by, v_sort_order, v_sort_order
178+
) USING v_prefix, bucketname, v_limit, offsets;
179+
RETURN;
180+
END IF;
181+
182+
IF v_prefix_lower = '' THEN
183+
v_upper_bound := NULL;
184+
ELSIF right(v_prefix_lower, 1) = v_delimiter THEN
185+
v_upper_bound := left(v_prefix_lower, -1) || chr(ascii(v_delimiter) + 1);
186+
ELSE
187+
v_upper_bound := left(v_prefix_lower, -1) || chr(ascii(right(v_prefix_lower, 1)) + 1);
188+
END IF;
189+
190+
IF v_is_asc THEN
191+
IF v_upper_bound IS NOT NULL THEN
192+
v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' ||
193+
'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" >= $2 ' ||
194+
'AND lower(o.name) COLLATE "C" < $3 ORDER BY lower(o.name) COLLATE "C" ASC LIMIT $4';
195+
ELSE
196+
v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' ||
197+
'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" >= $2 ' ||
198+
'ORDER BY lower(o.name) COLLATE "C" ASC LIMIT $4';
199+
END IF;
200+
ELSE
201+
IF v_upper_bound IS NOT NULL THEN
202+
v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' ||
203+
'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" < $2 ' ||
204+
'AND lower(o.name) COLLATE "C" >= $3 ORDER BY lower(o.name) COLLATE "C" DESC LIMIT $4';
205+
ELSE
206+
v_batch_query := 'SELECT o.name, o.id, o.updated_at, o.created_at, o.last_accessed_at, o.metadata ' ||
207+
'FROM storage.objects o WHERE o.bucket_id = $1 AND lower(o.name) COLLATE "C" < $2 ' ||
208+
'ORDER BY lower(o.name) COLLATE "C" DESC LIMIT $4';
209+
END IF;
210+
END IF;
211+
212+
IF v_is_asc THEN
213+
v_next_seek := v_prefix_lower;
214+
ELSE
215+
IF v_upper_bound IS NOT NULL THEN
216+
SELECT o.name INTO v_peek_name FROM storage.objects o
217+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_prefix_lower AND lower(o.name) COLLATE "C" < v_upper_bound
218+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
219+
ELSIF v_prefix_lower <> '' THEN
220+
SELECT o.name INTO v_peek_name FROM storage.objects o
221+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_prefix_lower
222+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
223+
ELSE
224+
SELECT o.name INTO v_peek_name FROM storage.objects o
225+
WHERE o.bucket_id = bucketname
226+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
227+
END IF;
228+
229+
IF v_peek_name IS NOT NULL THEN
230+
v_next_seek := lower(v_peek_name) || v_delimiter;
231+
ELSE
232+
RETURN;
233+
END IF;
234+
END IF;
235+
236+
LOOP
237+
EXIT WHEN v_count >= v_limit;
238+
239+
IF NOT v_has_pending_peek THEN
240+
IF v_is_asc THEN
241+
IF v_upper_bound IS NOT NULL THEN
242+
SELECT o.name INTO v_peek_name FROM storage.objects o
243+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_next_seek AND lower(o.name) COLLATE "C" < v_upper_bound
244+
ORDER BY lower(o.name) COLLATE "C" ASC LIMIT 1;
245+
ELSE
246+
SELECT o.name INTO v_peek_name FROM storage.objects o
247+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" >= v_next_seek
248+
ORDER BY lower(o.name) COLLATE "C" ASC LIMIT 1;
249+
END IF;
250+
ELSE
251+
IF v_upper_bound IS NOT NULL THEN
252+
SELECT o.name INTO v_peek_name FROM storage.objects o
253+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek AND lower(o.name) COLLATE "C" >= v_prefix_lower
254+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
255+
ELSIF v_prefix_lower <> '' THEN
256+
SELECT o.name INTO v_peek_name FROM storage.objects o
257+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek AND lower(o.name) COLLATE "C" >= v_prefix_lower
258+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
259+
ELSE
260+
SELECT o.name INTO v_peek_name FROM storage.objects o
261+
WHERE o.bucket_id = bucketname AND lower(o.name) COLLATE "C" < v_next_seek
262+
ORDER BY lower(o.name) COLLATE "C" DESC LIMIT 1;
263+
END IF;
264+
END IF;
265+
END IF;
266+
267+
v_has_pending_peek := FALSE;
268+
269+
EXIT WHEN v_peek_name IS NULL;
270+
271+
v_common_prefix := storage.get_common_prefix(lower(v_peek_name), v_prefix_lower, v_delimiter);
272+
273+
IF v_common_prefix IS NOT NULL THEN
274+
IF v_skipped < offsets THEN
275+
v_skipped := v_skipped + 1;
276+
ELSE
277+
name := storage.get_prefix_child_name(v_peek_name, v_prefix, v_delimiter);
278+
IF name IS NOT NULL THEN
279+
id := NULL;
280+
updated_at := NULL;
281+
created_at := NULL;
282+
last_accessed_at := NULL;
283+
metadata := NULL;
284+
RETURN NEXT;
285+
v_count := v_count + 1;
286+
END IF;
287+
END IF;
288+
289+
IF v_is_asc THEN
290+
v_next_seek := lower(left(v_common_prefix, -1)) || chr(ascii(v_delimiter) + 1);
291+
ELSE
292+
v_next_seek := lower(v_common_prefix);
293+
END IF;
294+
ELSE
295+
FOR v_current IN EXECUTE v_batch_query
296+
USING bucketname, v_next_seek,
297+
CASE WHEN v_is_asc THEN COALESCE(v_upper_bound, v_prefix_lower) ELSE v_prefix_lower END, v_file_batch_size
298+
LOOP
299+
v_common_prefix := storage.get_common_prefix(lower(v_current.name), v_prefix_lower, v_delimiter);
300+
301+
IF v_common_prefix IS NOT NULL THEN
302+
v_peek_name := v_current.name;
303+
v_has_pending_peek := TRUE;
304+
EXIT;
305+
END IF;
306+
307+
IF v_skipped < offsets THEN
308+
v_skipped := v_skipped + 1;
309+
ELSE
310+
name := storage.get_prefix_child_name(v_current.name, v_prefix, v_delimiter);
311+
IF name IS NOT NULL THEN
312+
id := v_current.id;
313+
updated_at := v_current.updated_at;
314+
created_at := v_current.created_at;
315+
last_accessed_at := v_current.last_accessed_at;
316+
metadata := v_current.metadata;
317+
RETURN NEXT;
318+
v_count := v_count + 1;
319+
END IF;
320+
END IF;
321+
322+
IF v_is_asc THEN
323+
v_next_seek := lower(v_current.name) || v_delimiter;
324+
ELSE
325+
v_next_seek := lower(v_current.name);
326+
END IF;
327+
328+
EXIT WHEN v_count >= v_limit;
329+
END LOOP;
330+
END IF;
331+
END LOOP;
332+
END;
333+
$func$;

src/internal/database/migrations/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,5 @@ export const DBMigration = {
5858
'fix-optimized-search-function': 56,
5959
's3-multipart-uploads-metadata': 57,
6060
'operation-ergonomics': 58,
61+
'fix-common-prefix-empty-segments': 59,
6162
} as const

src/storage/object.ts

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,32 @@ export interface ListObjectsV2Result {
4848
nextCursorKey?: string
4949
}
5050

51+
export function getNextCommonPrefix(
52+
key: string,
53+
prefix: string,
54+
delimiter: string
55+
): string | undefined {
56+
if (!delimiter || !key.startsWith(prefix)) {
57+
return undefined
58+
}
59+
60+
const suffix = key.slice(prefix.length)
61+
let scanIndex = 0
62+
63+
// Ignore empty path segments immediately after the current prefix so
64+
// repeated delimiters like `prefix//file` do not produce `prefix/` again.
65+
while (suffix.startsWith(delimiter, scanIndex)) {
66+
scanIndex += delimiter.length
67+
}
68+
69+
const nextDelimiterIndex = suffix.indexOf(delimiter, scanIndex)
70+
if (nextDelimiterIndex < 0) {
71+
return undefined
72+
}
73+
74+
return key.substring(0, prefix.length + nextDelimiterIndex + delimiter.length)
75+
}
76+
5177
/**
5278
* ObjectStorage
5379
* interact with remote objects and database state
@@ -624,11 +650,9 @@ export class ObjectStorage {
624650
if (delimiter) {
625651
const delimitedResults: Obj[] = []
626652
for (const object of searchResult) {
627-
let idx = object.name.replace(prefix, '').indexOf(delimiter)
653+
const currPrefix = getNextCommonPrefix(object.name, prefix, delimiter)
628654

629-
if (idx >= 0) {
630-
idx = prefix.length + idx + delimiter.length
631-
const currPrefix = object.name.substring(0, idx)
655+
if (currPrefix) {
632656
if (currPrefix === prevPrefix) {
633657
continue
634658
}

0 commit comments

Comments
 (0)