Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aiak_megatron/megatron/core/config_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def default(self, o):
return dataclasses.asdict(o)
try:
return super().default(o)
except:
except Exception:
return str(o)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def inspect_types(x: Any, prefix: Tuple = (), indent: int = 4):
else:
try:
x_str = str(x)
except:
except Exception:
x_str = '<no string repr>'
if len(x_str) > 30:
x_str = x_str[:30] + '... (truncated)'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@

try:
from transformer_engine.pytorch import fp8_model_init
except:
except Exception:
pass

try:
from transformer_engine.pytorch.module.base import TransformerEngineBaseModule
except:
except Exception:
pass


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def build_and_save_engine(
)
try:
model_cls = getattr(tensorrt_llm.models, architecture)
except:
except Exception:
raise AttributeError(f"Could not find TRTLLM model for architecture: {architecture}!")

logger.set_level("info")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
except ImportError:
try:
from megatron.core.extensions.transformer_engine import fused_apply_rotary_pos_emb
except:
except Exception:
fused_apply_rotary_pos_emb = None


Expand Down
4 changes: 2 additions & 2 deletions aiak_megatron/megatron/core/models/multimodal/llava_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
import transformer_engine_torch as tex

HAVE_TEX = True
except:
except Exception:
HAVE_TEX = False
except:
except Exception:
HAVE_TE = False
if get_context_parallel_world_size() > 1:
raise RuntimeError("ContextParallelism requires TransformerEngine support, but not found.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from megatron.core.extensions.transformer_engine import TENorm

NORM_IMPL = TENorm
except:
except Exception:
NORM_IMPL = torch.nn.LayerNorm


Expand Down
2 changes: 1 addition & 1 deletion aiak_megatron/megatron/core/transformer/attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

try:
from flash_attn import flash_attn_with_kvcache
except:
except Exception:
flash_attn_with_kvcache = None


Expand Down
2 changes: 1 addition & 1 deletion aiak_megatron/megatron/core/transformer/cuda_graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from megatron.core.extensions.transformer_engine import TECudaRNGStatesTracker

HAVE_TE_GRAPHS = True
except:
except Exception:
HAVE_TE_GRAPHS = False

_IS_GRAPH_CAPTURING = False
Expand Down
2 changes: 1 addition & 1 deletion aiak_megatron/pretrain_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def oom_observer(device, alloc, device_alloc, device_free):
# Check if fp8_model_init supports preserve_high_precision_init_val
if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters:
build_model_context_args["preserve_high_precision_init_val"] = True
except:
except Exception:
raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.")

with build_model_context(**build_model_context_args):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def tokens_to_string(toks):
for option in ["decode", "detokenize"]:
try:
return getattr(encoder.tokenizer, option)(toks)
except:
except Exception:
continue
raise RuntimeError(f"{type(encoder.tokenizer)} tokenizer cannot decode or detokenize")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
try:
from transformer_engine.pytorch.optimizers import FusedAdam as GPUAdam
from transformer_engine.pytorch.optimizers import FusedSGD as GPUSGD
except:
except Exception:
# Handle environment where transformer_engine is not installed
from torch.optim import SGD as GPUSGD
from torch.optim import Adam as GPUAdam
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def test_sliding_window_attention(self):
attn = DotProductAttention(
config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self'
)
except:
except Exception:
threw = True
finally:
assert threw, 'Expected DotProductAttention to throw exception for SWA'
Expand All @@ -169,7 +169,7 @@ def test_sliding_window_attention(self):
attn = TEDotProductAttention(
config, layer_number=1, attn_mask_type=AttnMaskType.causal, attention_type='self'
)
except:
except Exception:
threw = True
finally:
assert threw, "Expected TEDotProductAttention to throw for integer window-size"
Expand Down
2 changes: 1 addition & 1 deletion aiak_megatron/tools/checkpoint/loader_mcore.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,6 @@ def queue_put(name, msg):
def load_checkpoint(queue, args):
try:
_load_checkpoint(queue, args)
except:
except Exception:
queue.put("exit")
raise
2 changes: 1 addition & 1 deletion aiak_training_llm/models/custom/common/local_attention.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from flash_attn.flash_attn_interface import flash_attn_varlen_func
import rearrange
HAVE_FLASH_ATTN = True
except:
except Exception:
HAVE_FLASH_ATTN = False


Expand Down
4 changes: 2 additions & 2 deletions aiak_training_llm/models/custom/common/local_norm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
try:
from apex.normalization.fused_layer_norm import FusedRMSNorm as ApexFusedRMSNorm
HAVE_FUSED_RMS_NORM = True
except:
except Exception:
HAVE_FUSED_RMS_NORM = False

try:
from apex.normalization.fused_layer_norm import FusedLayerNorm as ApexFusedLayerNorm
HAVE_FUSED_LAYER_NORM = True
except:
except Exception:
HAVE_FUSED_LAYER_NORM = False


Expand Down
2 changes: 1 addition & 1 deletion aiak_training_llm/models/qwen/qwen_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def qwen_model_provider(
# Check if fp8_model_init supports preserve_high_precision_init_val
if "preserve_high_precision_init_val" in inspect.signature(fp8_model_init).parameters:
build_model_context_args["preserve_high_precision_init_val"] = True
except:
except Exception:
raise RuntimeError("--fp8-param-gather requires `fp8_model_init` from TransformerEngine,but not found.")

with build_model_context(**build_model_context_args):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _worker_process(job_queue, result_list, base_dir, output_dir,
while True:
try:
chunk = job_queue.get_nowait()
except:
except Exception:
break

logger.info(f"进程 {os.getpid()} 处理 chunk({len(chunk)} 条)")
Expand Down