|
14 | 14 | from urllib.parse import quote, urlparse |
15 | 15 |
|
16 | 16 | import numpy as np |
| 17 | +# Verbosity flag for wids; set WIDS_VERBOSE=1 to enable verbose output |
| 18 | +WIDS_VERBOSE = bool(int(os.environ.get("WIDS_VERBOSE", 0))) |
17 | 19 | import torch.distributed as dist |
18 | 20 |
|
19 | 21 | from .wids_decode import default_decoder |
@@ -100,7 +102,8 @@ def group_by_key(names): |
100 | 102 | for i, fname in enumerate(names): |
101 | 103 | # Ignore files that are not in a subdirectory. |
102 | 104 | if "." not in fname: |
103 | | - print(f"Warning: Ignoring file {fname} (no '.')") |
| 105 | + # Warn about files without extensions; can be silenced via warnings filter |
| 106 | + warnings.warn(f"Ignoring file {fname} (no '.')") |
104 | 107 | continue |
105 | 108 | key, ext = splitname(fname) |
106 | 109 | if key != last_key: |
@@ -435,7 +438,8 @@ def __init__( |
435 | 438 | self.cache_dir = os.environ.get("WIDS_CACHE", "/tmp/_wids_cache") |
436 | 439 | self.localname = DefaultLocalname(self.cache_dir) |
437 | 440 |
|
438 | | - if True or int(os.environ.get("WIDS_VERBOSE", 0)): |
| 441 | + # Only print dataset summary if verbosity enabled |
| 442 | + if WIDS_VERBOSE: |
439 | 443 | nbytes = sum(shard.get("filesize", 0) for shard in self.shards) |
440 | 444 | nsamples = sum(shard["nsamples"] for shard in self.shards) |
441 | 445 | print( |
@@ -480,7 +484,8 @@ def check_cache_misses(self): |
480 | 484 | if accesses > 100 and misses / accesses > 0.3: |
481 | 485 | # output a warning only once |
482 | 486 | self.check_cache_misses = lambda: None |
483 | | - print("Warning: ShardListDataset has a cache miss rate of {:.1%}%".format(misses * 100.0 / accesses)) |
| 487 | + # Warn about high cache miss rate; can be silenced via warnings filter |
| 488 | + warnings.warn(f"ShardListDataset has a cache miss rate of {misses/accesses:.1%}") |
484 | 489 |
|
485 | 490 | def get_shard(self, index): |
486 | 491 | """Get the shard and index within the shard corresponding to the given index.""" |
|
0 commit comments