You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: src/llamascopium/models/sparse_dictionary.py
+25-2Lines changed: 25 additions & 2 deletions
Original file line number
Diff line number
Diff line change
@@ -132,6 +132,12 @@ class SparseDictionaryConfig(BaseModelConfig, ABC):
132
132
top_k: int=50
133
133
"""The k value to use for the topk family of activation functions. For vanilla TopK, the L0 norm of the feature activations will be exactly equal to `top_k`."""
134
134
135
+
sae_pretrained_name_or_path: str|None=None
136
+
"""Optional pretrained SAE path or identifier used to restore model weights."""
137
+
138
+
strict_loading: bool=True
139
+
"""Whether to strictly enforce an exact state_dict key match when loading pretrained weights."""
140
+
135
141
use_triton_kernel: bool=False
136
142
"""Whether to use the Triton SpMM kernel for the sparse matrix multiplication. Currently only supported for vanilla SAE."""
"""Load the config of the sparse dictionary from a pretrained name or path. Config is read from <pretrained_name_or_path>/config.json (for local storage) or <repo_id>/<name>/config.json (for HuggingFace Hub).
0 commit comments