Skip to content

Commit cccef56

Browse files
committed
feat: enhance dataset size handling and storage comment generation
- Add parseDatasetSizeToBytes method to convert dataset size strings to bytes, supporting various units. - Implement buildRequiredStorageComment method to generate a storage estimate based on dataset sizes. - Update generateDownloadCommand to include storage comment in the command output. - Modify DownloadManager and SelectionPanelManager to utilize the new dataset size features.
1 parent f8c648d commit cccef56

3 files changed

Lines changed: 113 additions & 4 deletions

File tree

docs/js/modules/config.js

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -253,17 +253,123 @@ class ConfigManager {
253253
};
254254
}
255255

256+
/**
257+
* Parse dataset size string to bytes.
258+
* Supports common units: B, KB/MB/GB/TB, KiB/MiB/GiB/TiB (case-insensitive).
259+
* @param {unknown} size
260+
* @returns {number|null} Size in bytes, or null if unparseable
261+
*/
262+
static parseDatasetSizeToBytes(size) {
263+
if (size === undefined || size === null) return null;
264+
265+
// Allow passing a numeric value that represents bytes
266+
if (typeof size === 'number' && isFinite(size)) {
267+
return size >= 0 ? size : null;
268+
}
269+
270+
if (typeof size !== 'string') return null;
271+
272+
const raw = size.trim();
273+
if (!raw) return null;
274+
275+
// Match patterns like: "12GB", "12.3 GB", "1.2TiB", "800 MB", "1024B"
276+
const match = raw.match(/^([0-9]+(?:\.[0-9]+)?)\s*([a-zA-Z]+)?$/);
277+
if (!match) return null;
278+
279+
const value = parseFloat(match[1]);
280+
if (!isFinite(value)) return null;
281+
282+
const unitRaw = (match[2] || 'B').trim().toUpperCase();
283+
284+
// Normalize some common variants
285+
const unit = unitRaw
286+
.replace(/^BYTES?$/, 'B')
287+
.replace(/^KIB$/, 'KIB')
288+
.replace(/^MIB$/, 'MIB')
289+
.replace(/^GIB$/, 'GIB')
290+
.replace(/^TIB$/, 'TIB');
291+
292+
const SI = {
293+
B: 1,
294+
KB: 1e3,
295+
MB: 1e6,
296+
GB: 1e9,
297+
TB: 1e12
298+
};
299+
300+
const IEC = {
301+
KIB: 1024,
302+
MIB: 1024 ** 2,
303+
GIB: 1024 ** 3,
304+
TIB: 1024 ** 4
305+
};
306+
307+
if (SI[unit] !== undefined) return value * SI[unit];
308+
if (IEC[unit] !== undefined) return value * IEC[unit];
309+
310+
// Extra tolerance: allow shorthand like "G", "T"
311+
if (unit === 'K') return value * SI.KB;
312+
if (unit === 'M') return value * SI.MB;
313+
if (unit === 'G') return value * SI.GB;
314+
if (unit === 'T') return value * SI.TB;
315+
316+
return null;
317+
}
318+
319+
/**
320+
* Build the required storage comment for a list of datasets.
321+
* Rules:
322+
* - Use TB if total >= 1TB, otherwise GB
323+
* - Keep 1 decimal place
324+
* - If any dataset size is missing/unparseable, return placeholder ---GB/TB
325+
* @param {string[]} datasetPaths
326+
* @param {Map<string, Dataset>|null|undefined} datasetMap
327+
* @returns {string}
328+
*/
329+
static buildRequiredStorageComment(datasetPaths, datasetMap) {
330+
if (!Array.isArray(datasetPaths) || datasetPaths.length === 0) {
331+
return '# Required storage: 0.0GB.\n# Disk usage may be larger.';
332+
}
333+
334+
if (!datasetMap || typeof datasetMap.get !== 'function') {
335+
return '# Required storage: ---GB/TB.\n# Disk usage may be larger.';
336+
}
337+
338+
let totalBytes = 0;
339+
for (const path of datasetPaths) {
340+
const ds = datasetMap.get(path);
341+
const size = ds?.datasetSize ?? ds?.raw?.dataset_size;
342+
const bytes = this.parseDatasetSizeToBytes(size);
343+
if (bytes === null) {
344+
return '# Required storage: ---GB/TB.\n# Disk usage may be larger.';
345+
}
346+
totalBytes += bytes;
347+
}
348+
349+
const TB = 1e12;
350+
const GB = 1e9;
351+
const useTB = totalBytes >= TB;
352+
const value = useTB ? (totalBytes / TB) : (totalBytes / GB);
353+
const formatted = (Math.round(value * 10) / 10).toFixed(1);
354+
const unit = useTB ? 'TB' : 'GB';
355+
return `# Required storage: ${formatted}${unit}.\n# Disk usage may be larger.`;
356+
}
357+
256358
/**
257359
* Generate download command string
258360
* @param {string} hub - Hub name (e.g., 'modelscope', 'huggingface')
259361
* @param {string[]} datasets - Array of dataset paths
362+
* @param {Map<string, Dataset>} [datasetMap] - Optional dataset map for size calculation
260363
* @returns {string} Generated download command
261364
*/
262-
static generateDownloadCommand(hub, datasets) {
365+
static generateDownloadCommand(hub, datasets, datasetMap = undefined) {
263366
const config = this.getConfig().downloadCommand;
264367

368+
// First line: storage estimate comment (no blank line)
369+
const storageComment = this.buildRequiredStorageComment(datasets, datasetMap);
370+
265371
// Format: robocoin-download \
266-
let command = `${config.command}${config.lineContinuation}${config.lineBreak}`;
372+
let command = `${storageComment}${config.lineBreak}${config.command}${config.lineContinuation}${config.lineBreak}`;
267373

268374
// Format: --hub modelscope \
269375
command += `${config.hubParam} ${hub}${config.lineContinuation}${config.lineBreak}`;

docs/js/modules/download-manager.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
*/
55

66
import ConfigManager from './config.js';
7+
import dataManager from './data-manager.js';
78
import toastManager from './toast-manager.js';
89

910
/**
@@ -31,7 +32,8 @@ class DownloadManager {
3132
try {
3233
const command = ConfigManager.generateDownloadCommand(
3334
this.currentHub,
34-
[datasetPath]
35+
[datasetPath],
36+
dataManager?.datasetMap
3537
);
3638

3739
await this.copyToClipboard(command);

docs/js/modules/selection-panel.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ export class SelectionPanelManager {
192192
requestAnimationFrame(() => {
193193
output.textContent = ConfigManager.generateDownloadCommand(
194194
this.currentHub,
195-
this._sortedPathsCache
195+
this._sortedPathsCache,
196+
this.datasetMap
196197
);
197198
});
198199
}, 100);

0 commit comments

Comments
 (0)