Skip to content

Commit c97261d

Browse files
committed
Fine-tuning Jupyter notebook fixes
Signed-off-by: Bruno Alvisio <balvisio@nvidia.com>
1 parent 97f5c3a commit c97261d

2 files changed

Lines changed: 110 additions & 108 deletions

File tree

bionemo-recipes/recipes/evo2_megatron/examples/fine-tuning-tutorial.ipynb

Lines changed: 102 additions & 96 deletions
Large diffs are not rendered by default.

bionemo-recipes/recipes/evo2_megatron/src/bionemo/evo2/models/megatron/hyena/hyena_utils.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -783,23 +783,19 @@ def small_init_init_method(dim):
783783
Improving the Normalization of Self-Attention - Nguyen, T. & Salazar, J. (2010), using a normal distribution.
784784
"""
785785
std = math.sqrt(2 / (5 * dim))
786-
787-
def init_(tensor):
788-
res = torch.nn.init.normal_(tensor, mean=0.0, std=std)
789-
return res
790-
791-
return init_
786+
# Return functools.partial instead of a nested closure so the resulting callable has an
787+
# importable qualified name. Closures get serialized as `...<locals>.init_` in run_config.yaml
788+
# and cannot be re-instantiated during inference/checkpoint load.
789+
return partial(torch.nn.init.normal_, mean=0.0, std=std)
792790

793791

794792
def wang_init_method(n_layers, dim):
795793
"""Initialize the weights of the model using the Wang initialization method."""
796794
std = 2 / n_layers / math.sqrt(dim)
797-
798-
def init_(tensor):
799-
res = torch.nn.init.normal_(tensor, mean=0.0, std=std)
800-
return res
801-
802-
return init_
795+
# Return functools.partial instead of a nested closure so the resulting callable has an
796+
# importable qualified name. Closures get serialized as `...<locals>.init_` in run_config.yaml
797+
# and cannot be re-instantiated during inference/checkpoint load.
798+
return partial(torch.nn.init.normal_, mean=0.0, std=std)
803799

804800

805801
def get_init_method(init_method_name, num_layers, hidden_size):

0 commit comments

Comments
 (0)