use logging

Abdennacer-Badaoui · Abdennacer-Badaoui · commit cc6d1bcce00a · 2026-02-27T15:05:08.000+01:00
diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py
@@ -1,4 +1,5 @@
 from dataclasses import dataclass
+import logging
 from math import prod
 from typing import Optional
 import warnings
@@ -8,6 +9,8 @@
 
 import bitsandbytes.functional as F
 
+logger = logging.getLogger(__name__)
+
 # The inverse transformation for the colTuring and colAmpere format were contributed by Alex Borzunov:
 # https://github.com/bigscience-workshop/petals/blob/main/src/petals/utils/linear8bitlt_patch.py
 
@@ -123,7 +126,7 @@ def forward(
 
         # Cast A to fp16
         if A.dtype != torch.float16 and not _is_compiling():
-            warnings.warn(f"MatMul8bitLt: inputs will be cast from {A.dtype} to float16 during quantization")
+            logger.warning("MatMul8bitLt: inputs will be cast from %s to float16 during quantization", A.dtype)
 
         if len(A.shape) == 3:
             A = A.reshape(-1, A.shape[-1])
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -3,8 +3,8 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 import copy
+import logging
 from typing import Any, Optional, TypeVar, Union, overload
-import warnings
 
 import torch
 from torch import Tensor, device, dtype, nn
@@ -20,6 +20,8 @@
 from bitsandbytes.optim import GlobalOptimManager
 from bitsandbytes.utils import INVERSE_LINEAR_8BIT_WEIGHTS_FORMAT_MAPPING, OutlierTracer
 
+logger = logging.getLogger(__name__)
+
 T = TypeVar("T", bound="torch.nn.Module")
 
 
@@ -443,7 +445,7 @@ def fix_4bit_weight_quant_state_from_module(module: Union["Embedding4bit", "Line
         return
 
     if getattr(module, "quant_state", None) is None:
-        warnings.warn(
+        logger.warning(
             "FP4 quantization state not initialized. Please call .cuda() or .to(device) on the LinearFP4 layer first.",
         )
 
@@ -536,15 +538,13 @@ def set_compute_type(self, x):
             if self.compute_dtype in [None, torch.float32] and (x.numel() == x.shape[-1]):
                 # single batch inference with input torch.float16 and compute_dtype float32 -> slow inference when it could be fast
                 # warn the user about this
-                warnings.warn(
+                logger.warning(
                     "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference.",
                 )
-                warnings.filterwarnings("ignore", message=".*inference.")
             if self.compute_dtype in [None, torch.float32] and (x.numel() != x.shape[-1]):
-                warnings.warn(
+                logger.warning(
                     "Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.",
                 )
-                warnings.filterwarnings("ignore", message=".*inference or training")
 
     def _save_to_state_dict(self, destination, prefix, keep_vars):
         """
@@ -877,7 +877,7 @@ def __init__(
         blocksize = self.weight.blocksize
 
         if embedding_dim % blocksize != 0:
-            warnings.warn(
+            logger.warning(
                 f"Embedding size {embedding_dim} is not divisible by block size {blocksize}. "
                 "This will lead to slow inference.",
             )
@@ -1164,9 +1164,8 @@ def forward(self, x):
         if self.outlier_dim is None:
             tracer = OutlierTracer.get_instance()
             if not tracer.is_initialized():
-                print("Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer")
+                logger.warning("Please use OutlierTracer.initialize(model) before using the OutlierAwareLinear layer")
             outlier_idx = tracer.get_outliers(self.weight)
-            # print(outlier_idx, tracer.get_hvalue(self.weight))
             self.outlier_dim = outlier_idx
 
         if not self.is_quantized:
diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py
@@ -1,9 +1,12 @@
 import json
+import logging
 import shlex
 import subprocess
 
 import torch
 
+logger = logging.getLogger(__name__)
+
 
 def outlier_hook(module, input):
     assert isinstance(module, torch.nn.Linear)
@@ -65,7 +68,7 @@ def get_hvalue(self, weight):
 
     def get_outliers(self, weight):
         if not self.is_initialized():
-            print("Outlier tracer is not initialized...")
+            logger.warning("Outlier tracer is not initialized...")
             return None
         hvalue = self.get_hvalue(weight)
         if hvalue in self.hvalue2outlier_idx: