diff --git a/aiak_megatron/megatron/core/config_logger.py b/aiak_megatron/megatron/core/config_logger.py index 80f35c7..22bffa3 100644 --- a/aiak_megatron/megatron/core/config_logger.py +++ b/aiak_megatron/megatron/core/config_logger.py @@ -76,7 +76,7 @@ def default(self, o): return dataclasses.asdict(o) try: return super().default(o) - except: + except Exception: return str(o) diff --git a/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py b/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py index cd46134..a995e42 100644 --- a/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py +++ b/aiak_megatron/megatron/core/dist_checkpointing/dict_utils.py @@ -148,7 +148,7 @@ def inspect_types(x: Any, prefix: Tuple = (), indent: int = 4): else: try: x_str = str(x) - except: + except Exception: x_str = '' if len(x_str) > 30: x_str = x_str[:30] + '... (truncated)' diff --git a/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py b/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py index e321b19..9c6e816 100644 --- a/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py +++ b/aiak_megatron/megatron/core/distributed/custom_fsdp/param_and_grad_buffer.py @@ -24,12 +24,12 @@ try: from transformer_engine.pytorch import fp8_model_init -except: +except Exception: pass try: from transformer_engine.pytorch.module.base import TransformerEngineBaseModule -except: +except Exception: pass diff --git a/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py b/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py index 247d715..0e24a08 100644 --- a/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py +++ b/aiak_megatron/megatron/core/export/trtllm/engine_builder/trtllm_engine_builder.py @@ -79,7 +79,7 @@ def build_and_save_engine( ) try: model_cls = getattr(tensorrt_llm.models, architecture) - except: + except Exception: raise AttributeError(f"Could not find TRTLLM model for architecture: {architecture}!") logger.set_level("info") diff --git a/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py b/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py index 6c37d48..f840e26 100644 --- a/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py +++ b/aiak_megatron/megatron/core/models/common/embeddings/rope_utils.py @@ -25,7 +25,7 @@ except ImportError: try: from megatron.core.extensions.transformer_engine import fused_apply_rotary_pos_emb - except: + except Exception: fused_apply_rotary_pos_emb = None diff --git a/aiak_megatron/megatron/core/models/multimodal/llava_model.py b/aiak_megatron/megatron/core/models/multimodal/llava_model.py index 376540a..95f8e81 100644 --- a/aiak_megatron/megatron/core/models/multimodal/llava_model.py +++ b/aiak_megatron/megatron/core/models/multimodal/llava_model.py @@ -31,9 +31,9 @@ import transformer_engine_torch as tex HAVE_TEX = True - except: + except Exception: HAVE_TEX = False -except: +except Exception: HAVE_TE = False if get_context_parallel_world_size() > 1: raise RuntimeError("ContextParallelism requires TransformerEngine support, but not found.") diff --git a/aiak_megatron/megatron/core/models/vision/clip_vit_model.py b/aiak_megatron/megatron/core/models/vision/clip_vit_model.py index bfe3514..2a8ab01 100644 --- a/aiak_megatron/megatron/core/models/vision/clip_vit_model.py +++ b/aiak_megatron/megatron/core/models/vision/clip_vit_model.py @@ -18,7 +18,7 @@ from megatron.core.extensions.transformer_engine import TENorm NORM_IMPL = TENorm -except: +except Exception: NORM_IMPL = torch.nn.LayerNorm diff --git a/aiak_megatron/megatron/core/transformer/attention.py b/aiak_megatron/megatron/core/transformer/attention.py index b489a35..38ba937 100644 --- a/aiak_megatron/megatron/core/transformer/attention.py +++ b/aiak_megatron/megatron/core/transformer/attention.py @@ -25,7 +25,7 @@ try: from flash_attn import flash_attn_with_kvcache -except: +except Exception: flash_attn_with_kvcache = None diff --git a/aiak_megatron/megatron/core/transformer/cuda_graphs.py b/aiak_megatron/megatron/core/transformer/cuda_graphs.py index f19c601..4f7cdbd 100644 --- a/aiak_megatron/megatron/core/transformer/cuda_graphs.py +++ b/aiak_megatron/megatron/core/transformer/cuda_graphs.py @@ -33,7 +33,7 @@ from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker HAVE_TE_GRAPHS = True -except: +except Exception: HAVE_TE_GRAPHS = False _IS_GRAPH_CAPTURING = False diff --git a/aiak_megatron/pretrain_gpt.py b/aiak_megatron/pretrain_gpt.py index fc6d112..ca8f4e3 100644 --- a/aiak_megatron/pretrain_gpt.py +++ b/aiak_megatron/pretrain_gpt.py @@ -118,7 +118,7 @@ def oom_observer(device, alloc, device_alloc, device_free): # Check if fp8_model_init supports preserve_high_precision_init_val if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters: build_model_context_args["preserve_high_precision_init_val"] = True - except: + except Exception: raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.") with build_model_context(**build_model_context_args): diff --git a/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py b/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py index 8ee4f0a..d079a0e 100644 --- a/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py +++ b/aiak_megatron/tests/unit_tests/data/test_preprocess_data.py @@ -95,7 +95,7 @@ def tokens_to_string(toks): for option in ["decode", "detokenize"]: try: return getattr(encoder.tokenizer, option)(toks) - except: + except Exception: continue raise RuntimeError(f"{type(encoder.tokenizer)} tokenizer cannot decode or detokenize") diff --git a/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py b/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py index 1c36710..0ad0429 100644 --- a/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py +++ b/aiak_megatron/tests/unit_tests/test_optimizer_cpu_offloading.py @@ -11,7 +11,7 @@ try: from transformer_engine.pytorch.optimizers import FusedAdam as GPUAdam from transformer_engine.pytorch.optimizers import FusedSGD as GPUSGD -except: +except Exception: # Handle environment where transformer_engine is not installed from torch.optim import SGD as GPUSGD from torch.optim import Adam as GPUAdam diff --git a/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py b/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py index a9a245b..2139e44 100644 --- a/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py +++ b/aiak_megatron/tests/unit_tests/transformer/test_spec_customization.py @@ -150,7 +150,7 @@ def test_sliding_window_attention(self): attn = DotProductAttention( config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self' ) - except: + except Exception: threw = True finally: assert threw, 'Expected DotProductAttention to throw exception for SWA' @@ -169,7 +169,7 @@ def test_sliding_window_attention(self): attn = TEDotProductAttention( config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self' ) - except: + except Exception: threw = True finally: assert threw, "Expected TEDotProductAttention to throw for integer window-size" diff --git a/aiak_megatron/tools/checkpoint/loader_mcore.py b/aiak_megatron/tools/checkpoint/loader_mcore.py index 42d0a17..4293b06 100644 --- a/aiak_megatron/tools/checkpoint/loader_mcore.py +++ b/aiak_megatron/tools/checkpoint/loader_mcore.py @@ -378,6 +378,6 @@ def queue_put(name, msg): def load_checkpoint(queue, args): try: _load_checkpoint(queue, args) - except: + except Exception: queue.put("exit") raise diff --git a/aiak_training_llm/models/custom/common/local_attention.py b/aiak_training_llm/models/custom/common/local_attention.py index 6f565c0..e0a76b1 100644 --- a/aiak_training_llm/models/custom/common/local_attention.py +++ b/aiak_training_llm/models/custom/common/local_attention.py @@ -17,7 +17,7 @@ from flash_attn.flash_attn_interface import flash_attn_varlen_func import rearrange HAVE_FLASH_ATTN = True -except: +except Exception: HAVE_FLASH_ATTN = False diff --git a/aiak_training_llm/models/custom/common/local_norm.py b/aiak_training_llm/models/custom/common/local_norm.py index d488ee2..aee70db 100644 --- a/aiak_training_llm/models/custom/common/local_norm.py +++ b/aiak_training_llm/models/custom/common/local_norm.py @@ -6,13 +6,13 @@ try: from apex.normalization.fused_layer_norm import FusedRMSNorm as ApexFusedRMSNorm HAVE_FUSED_RMS_NORM = True -except: +except Exception: HAVE_FUSED_RMS_NORM = False try: from apex.normalization.fused_layer_norm import FusedLayerNorm as ApexFusedLayerNorm HAVE_FUSED_LAYER_NORM = True -except: +except Exception: HAVE_FUSED_LAYER_NORM = False diff --git a/aiak_training_llm/models/qwen/qwen_provider.py b/aiak_training_llm/models/qwen/qwen_provider.py index 8455be1..0a59f24 100644 --- a/aiak_training_llm/models/qwen/qwen_provider.py +++ b/aiak_training_llm/models/qwen/qwen_provider.py @@ -57,7 +57,7 @@ def qwen_model_provider( # Check if fp8_model_init supports preserve_high_precision_init_val if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters: build_model_context_args["preserve_high_precision_init_val"] = True - except: + except Exception: raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.") with build_model_context(**build_model_context_args): diff --git a/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py b/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py index ac58aab..f697a6a 100644 --- a/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py +++ b/examples_offline_packing/bmr_packing/s1_bmr_sft_data_proc_indcoding.py @@ -121,7 +121,7 @@ def _worker_process(job_queue, result_list, base_dir, output_dir, while True: try: chunk = job_queue.get_nowait() - except: + except Exception: break logger.info(f"进程 {os.getpid()} 处理 chunk({len(chunk)} 条)")