Skip to content

Commit ce4e480

Browse files
committed
refactor: simplify state file data source restoration
Replace complex rebuildDataSources() with simpler resolveToLeafSources() that walks the serialized DAG to find importable leaf sources (URIs or files) without reconstructing the full runtime DataSource tree.
1 parent 54453ef commit ce4e480

2 files changed

Lines changed: 71 additions & 168 deletions

File tree

src/io/import/importDataSources.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import handleAmazonS3 from '@/src/io/import/processors/handleAmazonS3';
2323
import handleGoogleCloudStorage from '@/src/io/import/processors/handleGoogleCloudStorage';
2424
import importSingleFile from '@/src/io/import/processors/importSingleFile';
2525
import handleRemoteManifest from '@/src/io/import/processors/remoteManifest';
26-
import restoreStateFile from '@/src/io/import/processors/restoreStateFile';
26+
import { restoreStateFile } from '@/src/io/import/processors/restoreStateFile';
2727
import updateFileMimeType from '@/src/io/import/processors/updateFileMimeType';
2828
import handleConfig from '@/src/io/import/processors/handleConfig';
2929
import {

src/io/import/processors/restoreStateFile.ts

Lines changed: 70 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import {
99
ImportHandler,
1010
StateFileContext,
1111
} from '@/src/io/import/common';
12-
import { DataSource } from '@/src/io/import/dataSource';
1312
import { MANIFEST, isStateFile } from '@/src/io/state-file';
1413
import { partition, getURLBasename } from '@/src/utils';
1514
import { useSegmentGroupStore } from '@/src/store/segmentGroups';
@@ -22,213 +21,119 @@ import { useViewStore } from '@/src/store/views';
2221
import { useViewConfigStore } from '@/src/store/view-configs';
2322
import { migrateManifest } from '@/src/io/state-file/migrations';
2423

25-
function findRootUriAncestors(
24+
type LeafSource =
25+
| { type: 'uri'; uri: string; name: string; mime?: string }
26+
| { type: 'file'; file: File; fileType: string };
27+
28+
function resolveToLeafSources(
2629
id: number,
27-
byId: Record<string, DataSourceType>
28-
): DataSourceType[] {
30+
byId: Record<number, DataSourceType>,
31+
datasetFilePath: Record<string, string> | undefined,
32+
pathToFile: Record<string, File>
33+
): LeafSource[] {
2934
const src = byId[id];
3035
if (!src) return [];
31-
if (src.type === 'uri') return [src];
32-
if ('parent' in src && src.parent !== undefined) {
33-
return findRootUriAncestors(src.parent, byId);
34-
}
35-
if (src.type === 'collection') {
36-
const uris = new Map<number, DataSourceType>();
37-
src.sources.forEach((sourceId) => {
38-
findRootUriAncestors(sourceId, byId).forEach((uri) => {
39-
uris.set(uri.id, uri);
40-
});
41-
});
42-
return [...uris.values()];
43-
}
44-
return [];
45-
}
4636

47-
function rebuildDataSources(
48-
serializedDataSources: DataSourceType[],
49-
fileIDToFile: Record<number, File>
50-
) {
51-
const dataSourceCache: Record<string, DataSource> = {};
52-
const byId: Record<string, DataSourceType> = {};
53-
const leaves = new Set<number>();
54-
55-
serializedDataSources.forEach((serializedSrc) => {
56-
byId[serializedSrc.id] = serializedSrc;
57-
leaves.add(serializedSrc.id);
58-
});
59-
60-
const deserialize = (
61-
serialized: (typeof serializedDataSources)[number]
62-
): DataSource | null => {
63-
const { type } = serialized;
64-
switch (type) {
65-
case 'file':
66-
return {
67-
type: 'file',
68-
file: fileIDToFile[serialized.fileId],
69-
fileType: serialized.fileType,
70-
};
71-
case 'archive': {
72-
const parent = dataSourceCache[serialized.parent];
73-
if (!parent) {
74-
return null;
75-
}
76-
if (parent.type !== 'file') {
77-
return null;
78-
}
79-
return {
80-
type: 'archive',
81-
path: serialized.path,
82-
parent,
83-
};
84-
}
85-
case 'uri': {
86-
const defaultName = getURLBasename(serialized.uri) || serialized.uri;
87-
return {
37+
switch (src.type) {
38+
case 'uri':
39+
return [
40+
{
8841
type: 'uri',
89-
uri: serialized.uri,
90-
name: serialized.name ?? defaultName,
91-
mime: serialized.mime,
92-
};
93-
}
94-
case 'collection': {
95-
serialized.sources.forEach((id) => {
96-
leaves.delete(id);
97-
});
98-
const sources = serialized.sources
99-
.map((id) => dataSourceCache[id])
100-
.filter((src): src is DataSource => src != null);
101-
if (sources.length === 0) {
102-
return null;
103-
}
104-
return {
105-
type: 'collection',
106-
sources,
107-
};
42+
uri: src.uri,
43+
name: src.name ?? getURLBasename(src.uri) ?? src.uri,
44+
mime: src.mime,
45+
},
46+
];
47+
48+
case 'file': {
49+
const filePath = datasetFilePath?.[src.fileId];
50+
const file = filePath ? pathToFile[filePath] : undefined;
51+
if (file) {
52+
return [{ type: 'file', file, fileType: src.fileType }];
10853
}
109-
default:
110-
throw new Error(
111-
`Encountered an invalid serialized data source: ${type}`
112-
);
113-
}
114-
};
115-
116-
for (let i = 0; i < serializedDataSources.length; i++) {
117-
const serializedSrc = serializedDataSources[i];
118-
119-
if (serializedSrc.id in dataSourceCache) {
120-
continue;
54+
console.warn(
55+
`State file missing expected file: ${filePath ?? src.fileId}`
56+
);
57+
return [];
12158
}
12259

123-
const dataSource = deserialize(serializedSrc);
124-
125-
if (!dataSource) {
126-
const rootUris = findRootUriAncestors(serializedSrc.id, byId);
127-
leaves.delete(serializedSrc.id);
128-
rootUris.forEach((uri) => leaves.add(uri.id));
129-
continue;
130-
}
60+
case 'archive':
61+
return resolveToLeafSources(
62+
src.parent,
63+
byId,
64+
datasetFilePath,
65+
pathToFile
66+
);
13167

132-
if (serializedSrc.parent) {
133-
dataSource.parent = dataSourceCache[serializedSrc.parent];
134-
leaves.delete(serializedSrc.parent);
135-
}
68+
case 'collection':
69+
return src.sources.flatMap((sourceId) =>
70+
resolveToLeafSources(sourceId, byId, datasetFilePath, pathToFile)
71+
);
13672

137-
dataSourceCache[serializedSrc.id] = dataSource;
73+
default:
74+
return [];
13875
}
139-
140-
return { dataSourceCache, leaves, byId };
14176
}
14277

14378
function prepareLeafDataSources(manifest: Manifest, datasetFiles: FileEntry[]) {
144-
const { dataSources } = manifest;
145-
const datasets =
146-
manifest.datasets ??
147-
dataSources
148-
.filter((ds) => ds.type === 'uri')
149-
.map((ds) => ({ id: String(ds.id), dataSourceId: ds.id }));
150-
const datasetFilePath = manifest.datasetFilePath ?? {};
151-
152-
const dataSourceIDToStateID = datasets.reduce<Record<number, string>>(
153-
(acc, ds) =>
154-
Object.assign(acc, {
155-
[ds.dataSourceId]: ds.id,
156-
}),
157-
{}
158-
);
159-
const pathToFile = datasetFiles.reduce<Record<string, File>>(
160-
(acc, datasetFile) =>
161-
Object.assign(acc, {
162-
[datasetFile.archivePath]: datasetFile.file,
163-
}),
164-
{}
165-
);
166-
const fileIDToFile = Object.entries(datasetFilePath).reduce<
167-
Record<number, File>
168-
>(
169-
(acc, [fileId, filePath]) =>
170-
Object.assign(acc, {
171-
[fileId]: pathToFile[filePath],
172-
}),
173-
{}
79+
const byId: Record<number, DataSourceType> = Object.fromEntries(
80+
manifest.dataSources.map((ds) => [ds.id, ds])
17481
);
17582

176-
const { dataSourceCache, leaves, byId } = rebuildDataSources(
177-
dataSources,
178-
fileIDToFile
83+
const pathToFile: Record<string, File> = Object.fromEntries(
84+
datasetFiles.map((f) => [f.archivePath, f.file])
17985
);
18086

181-
const leafDataSources = [...leaves]
182-
.filter((leafId) => leafId in dataSourceCache)
183-
.map((leafId) => {
184-
const dataSource = dataSourceCache[leafId];
185-
186-
let stateID = dataSourceIDToStateID[leafId];
87+
const datasets =
88+
manifest.datasets ??
89+
manifest.dataSources
90+
.filter((ds) => ds.type === 'uri')
91+
.map((ds) => ({ id: String(ds.id), dataSourceId: ds.id }));
18792

188-
if (!stateID) {
189-
const matchingDataset = datasets.find((ds) => {
190-
const rootUris = findRootUriAncestors(ds.dataSourceId, byId);
191-
return rootUris.some((uri) => uri.id === leafId);
192-
});
193-
if (matchingDataset) {
194-
stateID = matchingDataset.id;
195-
}
196-
}
93+
return datasets.flatMap((ds) => {
94+
const sources = resolveToLeafSources(
95+
ds.dataSourceId,
96+
byId,
97+
manifest.datasetFilePath,
98+
pathToFile
99+
);
197100

198-
return {
199-
...dataSource,
200-
stateFileLeaf: { stateID },
201-
};
101+
const seen = new Set<string>();
102+
const uniqueSources = sources.filter((src) => {
103+
if (src.type !== 'uri') return true;
104+
if (seen.has(src.uri)) return false;
105+
seen.add(src.uri);
106+
return true;
202107
});
203108

204-
return leafDataSources;
109+
return uniqueSources.map((src) => ({
110+
...src,
111+
stateFileLeaf: { stateID: ds.id },
112+
}));
113+
});
205114
}
206115

207116
async function completeStateFileRestore(ctx: StateFileContext) {
208117
const { manifest, stateFiles, stateIDToStoreID } = ctx;
209118
const stateIDToStoreIDRecord = Object.fromEntries(stateIDToStoreID);
210119

211-
// Restore view configs (handles missing configs gracefully)
212120
useViewConfigStore().deserializeAll(manifest, stateIDToStoreIDRecord);
213121

214-
// Restore the labelmaps
215122
const segmentGroupIDMap = await useSegmentGroupStore().deserialize(
216123
manifest,
217124
stateFiles,
218125
stateIDToStoreIDRecord
219126
);
127+
useLayersStore().deserialize(manifest, stateIDToStoreIDRecord);
220128

221-
// Restore the tools (each tool handles missing data gracefully)
222129
useToolStore().deserialize(
223130
manifest,
224131
segmentGroupIDMap,
225132
stateIDToStoreIDRecord
226133
);
227-
228-
useLayersStore().deserialize(manifest, stateIDToStoreIDRecord);
229134
}
230135

231-
const restoreStateFile: ImportHandler = async (dataSource, context) => {
136+
export const restoreStateFile: ImportHandler = async (dataSource, context) => {
232137
if (dataSource.type === 'file' && (await isStateFile(dataSource.file))) {
233138
const stateFileContents = await extractFilesFromZip(dataSource.file);
234139

@@ -311,5 +216,3 @@ const restoreStateFile: ImportHandler = async (dataSource, context) => {
311216
}
312217
return Skip;
313218
};
314-
315-
export default restoreStateFile;

0 commit comments

Comments
 (0)