@@ -253,17 +253,123 @@ class ConfigManager {
253253 } ;
254254 }
255255
256+ /**
257+ * Parse dataset size string to bytes.
258+ * Supports common units: B, KB/MB/GB/TB, KiB/MiB/GiB/TiB (case-insensitive).
259+ * @param {unknown } size
260+ * @returns {number|null } Size in bytes, or null if unparseable
261+ */
262+ static parseDatasetSizeToBytes ( size ) {
263+ if ( size === undefined || size === null ) return null ;
264+
265+ // Allow passing a numeric value that represents bytes
266+ if ( typeof size === 'number' && isFinite ( size ) ) {
267+ return size >= 0 ? size : null ;
268+ }
269+
270+ if ( typeof size !== 'string' ) return null ;
271+
272+ const raw = size . trim ( ) ;
273+ if ( ! raw ) return null ;
274+
275+ // Match patterns like: "12GB", "12.3 GB", "1.2TiB", "800 MB", "1024B"
276+ const match = raw . match ( / ^ ( [ 0 - 9 ] + (?: \. [ 0 - 9 ] + ) ? ) \s * ( [ a - z A - Z ] + ) ? $ / ) ;
277+ if ( ! match ) return null ;
278+
279+ const value = parseFloat ( match [ 1 ] ) ;
280+ if ( ! isFinite ( value ) ) return null ;
281+
282+ const unitRaw = ( match [ 2 ] || 'B' ) . trim ( ) . toUpperCase ( ) ;
283+
284+ // Normalize some common variants
285+ const unit = unitRaw
286+ . replace ( / ^ B Y T E S ? $ / , 'B' )
287+ . replace ( / ^ K I B $ / , 'KIB' )
288+ . replace ( / ^ M I B $ / , 'MIB' )
289+ . replace ( / ^ G I B $ / , 'GIB' )
290+ . replace ( / ^ T I B $ / , 'TIB' ) ;
291+
292+ const SI = {
293+ B : 1 ,
294+ KB : 1e3 ,
295+ MB : 1e6 ,
296+ GB : 1e9 ,
297+ TB : 1e12
298+ } ;
299+
300+ const IEC = {
301+ KIB : 1024 ,
302+ MIB : 1024 ** 2 ,
303+ GIB : 1024 ** 3 ,
304+ TIB : 1024 ** 4
305+ } ;
306+
307+ if ( SI [ unit ] !== undefined ) return value * SI [ unit ] ;
308+ if ( IEC [ unit ] !== undefined ) return value * IEC [ unit ] ;
309+
310+ // Extra tolerance: allow shorthand like "G", "T"
311+ if ( unit === 'K' ) return value * SI . KB ;
312+ if ( unit === 'M' ) return value * SI . MB ;
313+ if ( unit === 'G' ) return value * SI . GB ;
314+ if ( unit === 'T' ) return value * SI . TB ;
315+
316+ return null ;
317+ }
318+
319+ /**
320+ * Build the required storage comment for a list of datasets.
321+ * Rules:
322+ * - Use TB if total >= 1TB, otherwise GB
323+ * - Keep 1 decimal place
324+ * - If any dataset size is missing/unparseable, return placeholder ---GB/TB
325+ * @param {string[] } datasetPaths
326+ * @param {Map<string, Dataset>|null|undefined } datasetMap
327+ * @returns {string }
328+ */
329+ static buildRequiredStorageComment ( datasetPaths , datasetMap ) {
330+ if ( ! Array . isArray ( datasetPaths ) || datasetPaths . length === 0 ) {
331+ return '# Required storage: 0.0GB.\n# Disk usage may be larger.' ;
332+ }
333+
334+ if ( ! datasetMap || typeof datasetMap . get !== 'function' ) {
335+ return '# Required storage: ---GB/TB.\n# Disk usage may be larger.' ;
336+ }
337+
338+ let totalBytes = 0 ;
339+ for ( const path of datasetPaths ) {
340+ const ds = datasetMap . get ( path ) ;
341+ const size = ds ?. datasetSize ?? ds ?. raw ?. dataset_size ;
342+ const bytes = this . parseDatasetSizeToBytes ( size ) ;
343+ if ( bytes === null ) {
344+ return '# Required storage: ---GB/TB.\n# Disk usage may be larger.' ;
345+ }
346+ totalBytes += bytes ;
347+ }
348+
349+ const TB = 1e12 ;
350+ const GB = 1e9 ;
351+ const useTB = totalBytes >= TB ;
352+ const value = useTB ? ( totalBytes / TB ) : ( totalBytes / GB ) ;
353+ const formatted = ( Math . round ( value * 10 ) / 10 ) . toFixed ( 1 ) ;
354+ const unit = useTB ? 'TB' : 'GB' ;
355+ return `# Required storage: ${ formatted } ${ unit } .\n# Disk usage may be larger.` ;
356+ }
357+
256358 /**
257359 * Generate download command string
258360 * @param {string } hub - Hub name (e.g., 'modelscope', 'huggingface')
259361 * @param {string[] } datasets - Array of dataset paths
362+ * @param {Map<string, Dataset> } [datasetMap] - Optional dataset map for size calculation
260363 * @returns {string } Generated download command
261364 */
262- static generateDownloadCommand ( hub , datasets ) {
365+ static generateDownloadCommand ( hub , datasets , datasetMap = undefined ) {
263366 const config = this . getConfig ( ) . downloadCommand ;
264367
368+ // First line: storage estimate comment (no blank line)
369+ const storageComment = this . buildRequiredStorageComment ( datasets , datasetMap ) ;
370+
265371 // Format: robocoin-download \
266- let command = `${ config . command } ${ config . lineContinuation } ${ config . lineBreak } ` ;
372+ let command = `${ storageComment } ${ config . lineBreak } ${ config . command } ${ config . lineContinuation } ${ config . lineBreak } ` ;
267373
268374 // Format: --hub modelscope \
269375 command += `${ config . hubParam } ${ hub } ${ config . lineContinuation } ${ config . lineBreak } ` ;
0 commit comments