Setup download progress (#2289)

Qubitium · web-flow · commit b1abb5daa99e · 2025-12-17T19:24:07.000+08:00
* log download progress

* refractor

* refractor

* format
diff --git a/gptqmodel/version.py b/gptqmodel/version.py
@@ -7,4 +7,4 @@
 # even minor versions are release
 # 5.2.0 => release, 5.1.0 => devel
 # micro version (5.2.x) denotes patch fix, i.e. 5.2.1 is a patch fix release
-__version__ = "5.6.99"
+__version__ = "5.6.12"
diff --git a/setup.py b/setup.py
@@ -7,7 +7,6 @@
 import subprocess
 import sys
 import tarfile
-import urllib.request
 from pathlib import Path
 from shutil import rmtree
 
@@ -30,10 +29,11 @@ def _ensure_cutlass_source() -> Path:
 
     archive_path = deps_dir / f"cutlass-v{CUTLASS_VERSION}.tar.gz"
     if not archive_path.exists():
-        print(f"Downloading CUTLASS v{CUTLASS_VERSION} ...")
-        with urllib.request.urlopen(CUTLASS_RELEASE_URL) as response:
-            data = response.read()
-        archive_path.write_bytes(data)
+        _download_with_progress(
+            CUTLASS_RELEASE_URL,
+            str(archive_path),
+            title=f"Downloading CUTLASS v{CUTLASS_VERSION}",
+        )
 
     if cutlass_root.exists():
         rmtree(cutlass_root)
@@ -421,6 +421,51 @@ def _resolve_wheel_url(tag_name: str, wheel_name: str) -> str:
     return DEFAULT_WHEEL_URL_TEMPLATE.format(tag_name=tag_name, wheel_name=wheel_name)
 
 
+def _download_with_progress(url: str, dest_path: str, title: str = "Downloading") -> None:
+    """Download url to dest_path with simple stdout progress updates."""
+    import time
+    import urllib.request as req
+
+    start_time = time.time()
+    last_draw_time = 0.0
+    last_print_percent = -1
+
+    def _format_bytes(num_bytes: float) -> str:
+        units = ["B", "KiB", "MiB", "GiB", "TiB"]
+        value = float(max(num_bytes, 0.0))
+        for unit in units:
+            if value < 1024.0 or unit == units[-1]:
+                return f"{value:0.1f}{unit}" if unit != "B" else f"{int(value)}B"
+            value /= 1024.0
+        return f"{value:0.1f}TiB"
+
+    def _reporthook(block_num: int, block_size: int, total_size: int) -> None:
+        nonlocal last_draw_time, last_print_percent
+        now = time.time()
+        downloaded = block_num * block_size
+        speed = downloaded / max(now - start_time, 1e-6)
+
+        if total_size and total_size > 0:
+            percent = min(int(downloaded * 100 / total_size), 100)
+            if percent == last_print_percent and percent != 100:
+                return
+            subtitle = (
+                f"{percent:3d}% ({_format_bytes(downloaded)}/{_format_bytes(total_size)}) "
+                f"{_format_bytes(speed)}/s"
+            )
+            print(f"{title} {subtitle}", flush=True)
+            last_print_percent = percent
+            last_draw_time = now
+        else:
+            if (now - last_draw_time) < 1.0:
+                return
+            subtitle = f"{_format_bytes(downloaded)} {_format_bytes(speed)}/s"
+            print(f"{title} {subtitle}", flush=True)
+            last_draw_time = now
+
+    req.urlretrieve(url, dest_path, reporthook=_reporthook)
+
+
 # Decide HAS_CUDA_V8 / HAS_CUDA_V9 without torch
 HAS_CUDA_V8 = False
 HAS_CUDA_V9 = False
@@ -833,12 +878,12 @@ def run(self):
         print(f"Resolved wheel URL: {wheel_url}\nwheel name={wheel_filename}")
 
         try:
-            import urllib.request as req
-            req.urlretrieve(wheel_url, os.path.join(self.dist_dir, wheel_filename))
-
             if not os.path.exists(self.dist_dir):
                 os.makedirs(self.dist_dir)
 
+            wheel_path = os.path.join(self.dist_dir, wheel_filename)
+
+            _download_with_progress(wheel_url, wheel_path, title="Downloading wheel")
             print("Raw wheel path", wheel_filename)
         except BaseException:
             env_info = [f"python={python_version}", f"torch={TORCH_VERSION or 'unknown'}"]
diff --git a/tests/test_bitblas_gptq_v2.py b/tests/test_bitblas_gptq_v2.py
@@ -3,13 +3,12 @@
 import pytest
 import torch
 
+from gptqmodel import BACKEND, GPTQModel
 from gptqmodel.nn_modules.qlinear.bitblas import (
     BITBLAS_AVAILABLE,
     import_bitblas,
 )
 
-from gptqmodel import GPTQModel, BACKEND
-from gptqmodel.quantization.config import FORMAT
 
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is required for BitBLAS")
 @pytest.mark.skipif(not BITBLAS_AVAILABLE, reason="BitBLAS backend is not available")