EvolvingLMMs-Lab · haosenwang1018 · Feb 25, 2026
diff --git a/aiak_megatron/megatron/core/config_logger.py b/aiak_megatron/megatron/core/config_logger.py
@@ -76,7 +76,7 @@ def default(self, o):
             return dataclasses.asdict(o)
         try:
             return super().default(o)
-        except:
+        except Exception:
             return str(o)
 
 

diff --git a/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py b/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py
@@ -148,7 +148,7 @@ def inspect_types(x: Any, prefix: Tuple = (), indent: int = 4):
         else:
             try:
                 x_str = str(x)
-            except:
+            except Exception:
                 x_str = '<no string repr>'
             if len(x_str) > 30:
                 x_str = x_str[:30] + '... (truncated)'

diff --git a/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py b/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py
@@ -24,12 +24,12 @@
 
 try:
     from transformer_engine.pytorch import fp8_model_init
-except:
+except Exception:
     pass
 
 try:
     from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
-except:
+except Exception:
     pass
 
 

diff --git a/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py b/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py
@@ -79,7 +79,7 @@ def build_and_save_engine(
         )
         try:
             model_cls = getattr(tensorrt_llm.models, architecture)
-        except:
+        except Exception:
             raise AttributeError(f"Could not find TRTLLM model for architecture: {architecture}!")
 
         logger.set_level("info")

diff --git a/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py b/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py
@@ -25,7 +25,7 @@
 except ImportError:
     try:
         from megatron.core.extensions.transformer_engine import fused_apply_rotary_pos_emb
-    except:
+    except Exception:
         fused_apply_rotary_pos_emb = None
 
 

diff --git a/aiak_megatron/megatron/core/models/multimodal/llava_model.py b/aiak_megatron/megatron/core/models/multimodal/llava_model.py
@@ -31,9 +31,9 @@
         import transformer_engine_torch as tex
 
         HAVE_TEX = True
-    except:
+    except Exception:
         HAVE_TEX = False
-except:
+except Exception:
     HAVE_TE = False
     if get_context_parallel_world_size() > 1:
         raise RuntimeError("ContextParallelism requires TransformerEngine support, but not found.")

diff --git a/aiak_megatron/megatron/core/models/vision/clip_vit_model.py b/aiak_megatron/megatron/core/models/vision/clip_vit_model.py
@@ -18,7 +18,7 @@
     from megatron.core.extensions.transformer_engine import TENorm
 
     NORM_IMPL = TENorm
-except:
+except Exception:
     NORM_IMPL = torch.nn.LayerNorm
 
 

diff --git a/aiak_megatron/megatron/core/transformer/attention.py b/aiak_megatron/megatron/core/transformer/attention.py
@@ -25,7 +25,7 @@
 
 try:
     from flash_attn import flash_attn_with_kvcache
-except:
+except Exception:
     flash_attn_with_kvcache = None
 
 

diff --git a/aiak_megatron/megatron/core/transformer/cuda_graphs.py b/aiak_megatron/megatron/core/transformer/cuda_graphs.py
@@ -33,7 +33,7 @@
     from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker
 
     HAVE_TE_GRAPHS = True
-except:
+except Exception:
     HAVE_TE_GRAPHS = False
 
 _IS_GRAPH_CAPTURING = False

diff --git a/aiak_megatron/pretrain_gpt.py b/aiak_megatron/pretrain_gpt.py
@@ -118,7 +118,7 @@ def oom_observer(device, alloc, device_alloc, device_free):
                 # Check if fp8_model_init supports preserve_high_precision_init_val
                 if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters:
                     build_model_context_args["preserve_high_precision_init_val"] = True
-            except:
+            except Exception:
                 raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.")
 
         with build_model_context(**build_model_context_args):

diff --git a/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py b/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py
@@ -95,7 +95,7 @@ def tokens_to_string(toks):
         for option in ["decode", "detokenize"]:
             try:
                 return getattr(encoder.tokenizer, option)(toks)
-            except:
+            except Exception:
                 continue
         raise RuntimeError(f"{type(encoder.tokenizer)} tokenizer cannot decode or detokenize")
 

diff --git a/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py b/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py
@@ -11,7 +11,7 @@
 try:
     from transformer_engine.pytorch.optimizers import FusedAdam as GPUAdam
     from transformer_engine.pytorch.optimizers import FusedSGD as GPUSGD
-except:
+except Exception:
     # Handle environment where transformer_engine is not installed
     from torch.optim import SGD as GPUSGD
     from torch.optim import Adam as GPUAdam

diff --git a/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py b/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py
@@ -150,7 +150,7 @@ def test_sliding_window_attention(self):
             attn = DotProductAttention(
                 config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self'
             )
-        except:
+        except Exception:
             threw = True
         finally:
             assert threw, 'Expected DotProductAttention to throw exception for SWA'
@@ -169,7 +169,7 @@ def test_sliding_window_attention(self):
             attn = TEDotProductAttention(
                 config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self'
             )
-        except:
+        except Exception:
             threw = True
         finally:
             assert threw, "Expected TEDotProductAttention to throw for integer window-size"

diff --git a/aiak_megatron/tools/checkpoint/loader_mcore.py b/aiak_megatron/tools/checkpoint/loader_mcore.py
@@ -378,6 +378,6 @@ def queue_put(name, msg):
 def load_checkpoint(queue, args):
     try:
         _load_checkpoint(queue, args)
-    except:
+    except Exception:
         queue.put("exit")
         raise
diff --git a/aiak_training_llm/models/custom/common/local_attention.py b/aiak_training_llm/models/custom/common/local_attention.py
@@ -17,7 +17,7 @@
     from flash_attn.flash_attn_interface import flash_attn_varlen_func
     import rearrange
     HAVE_FLASH_ATTN = True
-except:
+except Exception:
     HAVE_FLASH_ATTN = False
 
 

diff --git a/aiak_training_llm/models/custom/common/local_norm.py b/aiak_training_llm/models/custom/common/local_norm.py
@@ -6,13 +6,13 @@
 try:
     from apex.normalization.fused_layer_norm import FusedRMSNorm as ApexFusedRMSNorm
     HAVE_FUSED_RMS_NORM = True
-except:
+except Exception:
     HAVE_FUSED_RMS_NORM = False
 
 try:
     from apex.normalization.fused_layer_norm import FusedLayerNorm as ApexFusedLayerNorm
     HAVE_FUSED_LAYER_NORM = True
-except:
+except Exception:
     HAVE_FUSED_LAYER_NORM = False
 
 

diff --git a/aiak_training_llm/models/qwen/qwen_provider.py b/aiak_training_llm/models/qwen/qwen_provider.py
@@ -57,7 +57,7 @@ def qwen_model_provider(
             # Check if fp8_model_init supports preserve_high_precision_init_val
             if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters:
                 build_model_context_args["preserve_high_precision_init_val"] = True
-        except:
+        except Exception:
             raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.")
 
     with build_model_context(**build_model_context_args):

diff --git a/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py b/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py
@@ -121,7 +121,7 @@ def _worker_process(job_queue, result_list, base_dir, output_dir,
     while True:
         try:
             chunk = job_queue.get_nowait()
-        except:
+        except Exception:
             break
 
         logger.info(f"进程 {os.getpid()} 处理 chunk（{len(chunk)} 条）")
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,7 +25,7 @@ @@
     try:
         from flash_attn import flash_attn_with_kvcache
-    except:
+    except Exception:
         flash_attn_with_kvcache = None
@@ Expand Down @@