Skip to content

Commit 59a828a

Browse files
authored
Merge pull request #1321 from TransformerLensOrg/dev
Release 3.3.0
2 parents 3ee411b + 00cf3c2 commit 59a828a

185 files changed

Lines changed: 12345 additions & 3472 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/checks.yml

Lines changed: 32 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: Checks
22

33
on:
4+
workflow_dispatch:
45
push:
56
branches:
67
- main
@@ -42,6 +43,15 @@ permissions:
4243
actions: write
4344
contents: write
4445

46+
# Cancel in-progress PR runs on new push; non-PR events (release, tags) are exempt.
47+
concurrency:
48+
group: ${{ github.workflow }}-${{ github.ref }}
49+
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
50+
51+
# Retry HF 429s in non-pytest invocations; pytest enables via tests/conftest.py.
52+
env:
53+
TRANSFORMERLENS_HF_RETRY: "1"
54+
4555
jobs:
4656
compatibility-checks:
4757
name: Compatibility Checks
@@ -54,15 +64,15 @@ jobs:
5464
- "3.11"
5565
- "3.12"
5666
steps:
57-
- uses: actions/checkout@v3
67+
- uses: actions/checkout@v4
5868
- name: Install uv
59-
uses: astral-sh/setup-uv@v6
69+
uses: astral-sh/setup-uv@v7
6070
with:
6171
python-version: ${{ matrix.python-version }}
6272
activate-environment: true
6373
enable-cache: true
6474
- name: Cache Models used with Tests
65-
uses: actions/cache@v3
75+
uses: actions/cache@v4
6676
with:
6777
path: |
6878
~/.cache/huggingface/hub/models--gpt2
@@ -113,13 +123,13 @@ jobs:
113123
steps:
114124
- uses: actions/checkout@v4
115125
- name: Install uv
116-
uses: astral-sh/setup-uv@v6
126+
uses: astral-sh/setup-uv@v7
117127
with:
118128
python-version: "3.11"
119129
activate-environment: true
120130
enable-cache: true
121131
- name: MPS Cache Models
122-
uses: actions/cache@v3
132+
uses: actions/cache@v4
123133
with:
124134
path: |
125135
~/.cache/huggingface/hub/models--roneneldan--TinyStories-1M*
@@ -178,9 +188,9 @@ jobs:
178188
runs-on: ubuntu-latest
179189
timeout-minutes: 10
180190
steps:
181-
- uses: actions/checkout@v3
191+
- uses: actions/checkout@v4
182192
- name: Install uv
183-
uses: astral-sh/setup-uv@v6
193+
uses: astral-sh/setup-uv@v7
184194
with:
185195
python-version: "3.12"
186196
activate-environment: true
@@ -197,9 +207,9 @@ jobs:
197207
runs-on: ubuntu-latest
198208
timeout-minutes: 10
199209
steps:
200-
- uses: actions/checkout@v3
210+
- uses: actions/checkout@v4
201211
- name: Install uv
202-
uses: astral-sh/setup-uv@v6
212+
uses: astral-sh/setup-uv@v7
203213
with:
204214
python-version: "3.12"
205215
activate-environment: true
@@ -216,9 +226,9 @@ jobs:
216226
runs-on: ubuntu-latest
217227
timeout-minutes: 15
218228
steps:
219-
- uses: actions/checkout@v3
229+
- uses: actions/checkout@v4
220230
- name: Install uv
221-
uses: astral-sh/setup-uv@v6
231+
uses: astral-sh/setup-uv@v7
222232
with:
223233
python-version: "3.12"
224234
activate-environment: true
@@ -235,15 +245,15 @@ jobs:
235245
runs-on: ubuntu-latest
236246
timeout-minutes: 60
237247
steps:
238-
- uses: actions/checkout@v3
248+
- uses: actions/checkout@v4
239249
- name: Install uv
240-
uses: astral-sh/setup-uv@v6
250+
uses: astral-sh/setup-uv@v7
241251
with:
242252
python-version: "3.12"
243253
activate-environment: true
244254
enable-cache: true
245255
- name: Cache Models used with Tests
246-
uses: actions/cache@v3
256+
uses: actions/cache@v4
247257
with:
248258
path: |
249259
~/.cache/huggingface/hub/models--gpt2
@@ -282,7 +292,7 @@ jobs:
282292
- name: Build check
283293
run: uv build
284294
- name: Upload Coverage Report Artifact
285-
uses: actions/upload-artifact@v4
295+
uses: actions/upload-artifact@v7
286296
with:
287297
name: test-coverage
288298
path: htmlcov
@@ -320,7 +330,7 @@ jobs:
320330
- notebook: "LLaMA2_GPU_Quantized"
321331
requires_hf_token: true
322332
steps:
323-
- uses: actions/checkout@v3
333+
- uses: actions/checkout@v4
324334
- name: Add swap space
325335
run: |
326336
sudo swapoff /swapfile 2>/dev/null || true
@@ -330,13 +340,13 @@ jobs:
330340
sudo mkswap /swapfile
331341
sudo swapon /swapfile
332342
- name: Install uv
333-
uses: astral-sh/setup-uv@v6
343+
uses: astral-sh/setup-uv@v7
334344
with:
335345
python-version: "3.11"
336346
activate-environment: true
337347
enable-cache: true
338348
- name: Re-use HuggingFace models cache
339-
uses: actions/cache/restore@v3
349+
uses: actions/cache/restore@v4
340350
with:
341351
path: ~/.cache/huggingface/hub
342352
key: ${{ runner.os }}-huggingface-models
@@ -374,7 +384,7 @@ jobs:
374384
steps:
375385
- uses: actions/checkout@v4
376386
- name: Install uv
377-
uses: astral-sh/setup-uv@v6
387+
uses: astral-sh/setup-uv@v7
378388
with:
379389
python-version: "3.11"
380390
activate-environment: true
@@ -389,7 +399,7 @@ jobs:
389399
uv lock --check
390400
uv sync
391401
- name: Download Test Coverage Artifact
392-
uses: actions/download-artifact@v4
402+
uses: actions/download-artifact@v8
393403
with:
394404
name: test-coverage
395405
path: docs/source/_static/coverage
@@ -403,7 +413,7 @@ jobs:
403413
env:
404414
HF_TOKEN: ${{ secrets.HF_TOKEN }}
405415
- name: Upload Docs Artifact
406-
uses: actions/upload-artifact@v4
416+
uses: actions/upload-artifact@v7
407417
with:
408418
name: documentation
409419
path: docs/build
@@ -417,7 +427,7 @@ jobs:
417427
steps:
418428
- uses: actions/checkout@v4
419429
- name: Download Docs Artifact
420-
uses: actions/download-artifact@v4
430+
uses: actions/download-artifact@v8
421431
with:
422432
name: documentation
423433
path: docs/build

.github/workflows/release.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
runs-on: ubuntu-latest
1818
steps:
1919
- name: Install uv
20-
uses: astral-sh/setup-uv@v6
20+
uses: astral-sh/setup-uv@v7
2121
with:
2222
python-version: "3.12"
2323
activate-environment: true
@@ -44,9 +44,9 @@ jobs:
4444
- semver-parser
4545
runs-on: ubuntu-latest
4646
steps:
47-
- uses: actions/checkout@v3
47+
- uses: actions/checkout@v4
4848
- name: Install uv
49-
uses: astral-sh/setup-uv@v6
49+
uses: astral-sh/setup-uv@v7
5050
with:
5151
python-version: "3.12"
5252
activate-environment: true

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ CD](https://github.com/TransformerLensOrg/TransformerLens/actions/workflows/chec
1010
[![Docs
1111
CD](https://github.com/TransformerLensOrg/TransformerLens/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/TransformerLensOrg/TransformerLens/actions/workflows/pages/pages-build-deployment)
1212

13-
A Library for Mechanistic Interpretability of Generative Language Models. Maintained by [Bryce Meyer](https://github.com/bryce13950) and created by [Neel Nanda](https://neelnanda.io/about)
13+
A Library for Mechanistic Interpretability of Generative Language Models. Maintained by [Bryce Meyer](https://github.com/bryce13950) and [Jonah Larson](https://github.com/jlarson4); created by [Neel Nanda](https://neelnanda.io/about)
1414

1515
[![Read the Docs
1616
Here](https://img.shields.io/badge/-Read%20the%20Docs%20Here-blue?style=for-the-badge&logo=Read-the-Docs&logoColor=white&link=https://TransformerLensOrg.github.io/TransformerLens/)](https://TransformerLensOrg.github.io/TransformerLens/)
@@ -50,6 +50,8 @@ bridge = TransformerBridge.boot_transformers("gpt2", device="cpu")
5050
logits, activations = bridge.run_with_cache("Hello World")
5151
```
5252

53+
> Gated models (Llama, Mistral, Gemma, ...) require `HF_TOKEN` in your environment. See [Environment Variables](https://TransformerLensOrg.github.io/TransformerLens/content/getting_started.html#environment-variables) for the full list.
54+
5355
`TransformerBridge` is the recommended 3.0 path and supports 50+ architectures. By default it preserves raw HuggingFace weights – logits and activations match HF, *not* legacy `HookedTransformer` (which folds LayerNorm and centers weights by default). Call `bridge.enable_compatibility_mode()` after booting for HookedTransformer-equivalent numerics. The legacy `HookedTransformer.from_pretrained` API is still available but deprecated — see the [Migrating to TransformerLens 3](https://TransformerLensOrg.github.io/TransformerLens/content/migrating_to_v3.html) guide.
5456

5557
## Key Tutorials

demos/ARENA_Content.ipynb

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,9 @@
5656
},
5757
{
5858
"cell_type": "code",
59-
"execution_count": 2,
59+
"execution_count": null,
6060
"metadata": {},
61-
"outputs": [
62-
{
63-
"name": "stderr",
64-
"output_type": "stream",
65-
"text": [
66-
"`torch_dtype` is deprecated! Use `dtype` instead!\n",
67-
"The following generation flags are not valid and may be ignored: ['output_attentions']. Set `TRANSFORMERS_VERBOSITY=info` for more details.\n"
68-
]
69-
}
70-
],
61+
"outputs": [],
7162
"source": [
7263
"# NBVAL_IGNORE_OUTPUT\n",
7364
"\n",
@@ -76,10 +67,10 @@
7667
" \"gpt2\",\n",
7768
" device=device,\n",
7869
")\n",
79-
"reference_gpt2.enable_compatibility_mode(disable_warnings=True)",
80-
"\n",
70+
"reference_gpt2.enable_compatibility_mode(disable_warnings=True)\n",
8171
"reference_gpt2.set_use_split_qkv_input(True)\n",
82-
"reference_gpt2.set_use_attn_result(True)"
72+
"reference_gpt2.set_use_attn_result(True)\n",
73+
"reference_gpt2.set_use_hook_mlp_in(True)"
8374
]
8475
},
8576
{

0 commit comments

Comments
 (0)