Skip to content

Commit 3f1408c

Browse files
authored
🐛 FIX: nbfomat v4.5 cell IDs (#62)
v4.5 notebooks now contain `cell.id`. To deal with this, we always hash the notebooks as v4.4 (with ids removed). Merging cache into a notebook now also preserves the input notebook minor version, adding or removing `cell.id` where required. Additionally, tests for timeouts are improved, to be more deterministic.
1 parent b4026a2 commit 3f1408c

15 files changed

Lines changed: 276 additions & 587 deletions

File tree

.circleci/config.yml

Lines changed: 0 additions & 37 deletions
This file was deleted.

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ jobs:
2626
strategy:
2727
matrix:
2828
os: [ubuntu-latest]
29-
python-version: [3.6, 3.7, 3.8]
29+
python-version: [3.6, 3.7, 3.8, 3.9]
3030
include:
3131
- os: windows-latest
3232
python-version: 3.7

.pre-commit-config.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,13 @@ repos:
1515
- id: check-yaml
1616
- id: end-of-file-fixer
1717
- id: trailing-whitespace
18-
- id: flake8
1918

2019
- repo: https://github.com/psf/black
2120
rev: 20.8b1
2221
hooks:
2322
- id: black
23+
24+
- repo: https://gitlab.com/pycqa/flake8
25+
rev: 3.8.4
26+
hooks:
27+
- id: flake8

docs/using/api.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
"metadata": {},
3030
"source": [
3131
"```{note}\n",
32-
"The full Jupyter notebook for this page can accessed here; {jupyter-download:notebook}`api`.\n",
32+
"The full Jupyter notebook for this page can accessed here; {nb-download:notebook.ipynb}`api`.\n",
3333
"Try it for yourself!\n",
3434
"```"
3535
]

jupyter_cache/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# NOTE: never import anything here, in order to maintain CLI speed
2-
__version__ = "0.4.1"
2+
__version__ = "0.4.2a1"
33

44

55
def get_cache(path, cache_cls=None):

jupyter_cache/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def match_cache_file(self, path: str) -> NbCacheRecord:
195195
196196
:raises KeyError: if no match is found
197197
"""
198-
notebook = nbf.read(path, NB_VERSION)
198+
notebook = nbf.read(path, nbf.NO_CONVERT)
199199
return self.match_cache_notebook(notebook)
200200

201201
@abstractmethod
@@ -229,7 +229,7 @@ def merge_match_into_file(
229229
:raises KeyError: if no match is found
230230
:return: pk, input notebook with cached code cells and metadata merged.
231231
"""
232-
nb = nbf.read(str(path), NB_VERSION)
232+
nb = nbf.read(str(path), nbf.NO_CONVERT)
233233
return self.merge_match_into_notebook(nb, nb_meta, cell_meta)
234234

235235
@abstractmethod
@@ -249,7 +249,7 @@ def diff_nbfile_with_cache(
249249
250250
Note: this will not diff markdown content, since it is not stored in the cache.
251251
"""
252-
nb = nbf.read(path, NB_VERSION)
252+
nb = nbf.read(path, nbf.NO_CONVERT)
253253
return self.diff_nbnode_with_cache(pk, nb, uri=path, as_str=as_str, **kwargs)
254254

255255
@abstractmethod

jupyter_cache/cache/main.py

Lines changed: 73 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -112,54 +112,63 @@ def change_cache_limit(self, size: int):
112112
assert isinstance(size, int) and size > 0
113113
Setting.set_value(CACHE_LIMIT_KEY, size, self.db)
114114

115-
def _create_hashable_nb(
115+
def create_hashed_notebook(
116116
self,
117117
nb: nbf.NotebookNode,
118-
compare_nb_meta=("kernelspec",),
119-
compare_cell_meta=None,
120-
):
121-
"""Create a notebook containing only content desired for hashing."""
118+
nb_metadata: Optional[Iterable[str]] = ("kernelspec",),
119+
cell_metadata: Optional[Iterable[str]] = None,
120+
) -> Tuple[nbf.NotebookNode, str]:
121+
"""Convert a notebook to a standard format and hash.
122+
123+
Note: we always hash notebooks as version 4.4,
124+
to allow for matching notebooks of different versions
125+
126+
:param nb_metadata: The notebook metadata keys to hash (if None, use all)
127+
:param cell_metadata: The cell metadata keys to hash (if None, use all)
128+
129+
:return: (notebook, hash)
130+
"""
131+
# copy the notebook
122132
nb = copy.deepcopy(nb)
123-
nb.metadata = nbf.from_dict(
133+
# update the notebook to consistent version 4.4
134+
nb = nbf.convert(nb, to_version=NB_VERSION)
135+
if nb.nbformat_minor > 5:
136+
raise CachingError("notebook version greater than 4.5 not yet supported")
137+
# remove non-code cells
138+
nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"]
139+
# create notebook for hashing, with selected metadata
140+
hash_nb = nbf.from_dict(
124141
{
125-
k: v
126-
for k, v in nb.metadata.items()
127-
if compare_nb_meta is None or (k in compare_nb_meta)
142+
"nbformat": nb.nbformat,
143+
"nbformat_minor": 4, # v4.5 include cell ids, which we do not cache
144+
"metadata": {
145+
k: v
146+
for k, v in nb.metadata.items()
147+
if nb_metadata is None or (k in nb_metadata)
148+
},
149+
"cells": [
150+
{
151+
"cell_type": cell.cell_type,
152+
"source": cell.source,
153+
"metadata": {
154+
k: v
155+
for k, v in cell.metadata.items()
156+
if cell_metadata is None or (k in cell_metadata)
157+
},
158+
"execution_count": None,
159+
"outputs": [],
160+
}
161+
for cell in nb.cells
162+
if cell.cell_type == "code"
163+
],
128164
}
129165
)
130-
diff_cells = []
131-
for cell in nb.cells:
132-
if cell.cell_type != "code":
133-
continue
134-
diff_cell = nbf.from_dict(
135-
{
136-
"cell_type": cell.cell_type,
137-
"source": cell.source,
138-
"metadata": {
139-
k: v
140-
for k, v in cell.metadata.items()
141-
if compare_cell_meta is None or (k in compare_cell_meta)
142-
},
143-
"execution_count": None,
144-
"outputs": [],
145-
}
146-
)
147-
diff_cells.append(diff_cell)
148-
nb.cells = diff_cells
149-
return nb
150166

151-
def _hash_notebook(
152-
self,
153-
nb: nbf.NotebookNode,
154-
compare_nb_meta=("kernelspec",),
155-
compare_cell_meta=None,
156-
):
157-
"""Create a hashkey for a notebook bundle."""
158-
nb = self._create_hashable_nb(
159-
nb, compare_nb_meta=compare_nb_meta, compare_cell_meta=compare_cell_meta
160-
)
161-
string = nbf.writes(nb, NB_VERSION)
162-
return hashlib.md5(string.encode()).hexdigest()
167+
# hash notebook
168+
string = nbf.writes(hash_nb, nbf.NO_CONVERT)
169+
hash_string = hashlib.md5(string.encode()).hexdigest()
170+
171+
return (nb, hash_string)
163172

164173
def _validate_nb_bundle(self, nb_bundle: NbBundleIn):
165174
"""Validate that a notebook bundle should be cached.
@@ -182,13 +191,6 @@ def _validate_nb_bundle(self, nb_bundle: NbBundleIn):
182191
# TODO check for output exceptions?
183192
# TODO assets
184193

185-
def _prepare_nb_for_cache(self, nb: nbf.NotebookNode, deepcopy=False):
186-
"""Prepare in-place, we remove non-code cells."""
187-
if deepcopy:
188-
nb = copy.deepcopy(nb)
189-
nb.cells = [cell for cell in nb.cells if cell.cell_type == "code"]
190-
return nb
191-
192194
def cache_notebook_bundle(
193195
self,
194196
bundle: NbBundleIn,
@@ -201,7 +203,9 @@ def cache_notebook_bundle(
201203

202204
if check_validity:
203205
self._validate_nb_bundle(bundle)
204-
hashkey = self._hash_notebook(bundle.nb)
206+
207+
hashed_nb, hashkey = self.create_hashed_notebook(bundle.nb)
208+
205209
path = self._get_notebook_path_cache(hashkey)
206210
if path.exists():
207211
if not overwrite:
@@ -221,8 +225,7 @@ def cache_notebook_bundle(
221225
description=description,
222226
)
223227
path.parent.mkdir(parents=True)
224-
self._prepare_nb_for_cache(bundle.nb)
225-
path.write_text(nbf.writes(bundle.nb, NB_VERSION), encoding="utf8")
228+
path.write_text(nbf.writes(hashed_nb, nbf.NO_CONVERT), encoding="utf8")
226229

227230
# write artifacts
228231
artifact_folder = self._get_artifact_path_cache(hashkey)
@@ -260,7 +263,7 @@ def cache_notebook_file(
260263
:param overwrite: Allow overwrite of cached notebooks with matching hash
261264
:return: The primary key of the cache record
262265
"""
263-
notebook = nbf.read(str(path), NB_VERSION)
266+
notebook = nbf.read(str(path), nbf.NO_CONVERT)
264267
return self.cache_notebook_bundle(
265268
NbBundleIn(
266269
notebook,
@@ -289,7 +292,7 @@ def get_cache_bundle(self, pk: int) -> NbBundleOut:
289292
)
290293

291294
return NbBundleOut(
292-
nbf.reads(path.read_text(encoding="utf8"), NB_VERSION),
295+
nbf.reads(path.read_text(encoding="utf8"), nbf.NO_CONVERT),
293296
record=record,
294297
artifacts=NbArtifacts(
295298
[p for p in artifact_folder.glob("**/*") if p.is_file()],
@@ -325,15 +328,15 @@ def match_cache_notebook(self, nb: nbf.NotebookNode) -> NbCacheRecord:
325328
326329
:raises KeyError: if no match is found
327330
"""
328-
hashkey = self._hash_notebook(nb)
331+
_, hashkey = self.create_hashed_notebook(nb)
329332
cache_record = NbCacheRecord.record_from_hashkey(hashkey, self.db)
330333
return cache_record
331334

332335
def merge_match_into_notebook(
333336
self,
334337
nb: nbf.NotebookNode,
335-
nb_meta=("kernelspec", "language_info", "widgets"),
336-
cell_meta=None,
338+
nb_meta: Optional[Iterable[str]] = ("kernelspec", "language_info", "widgets"),
339+
cell_meta: Optional[Iterable[str]] = None,
337340
) -> Tuple[int, nbf.NotebookNode]:
338341
"""Match to an executed notebook and return a merged version
339342
@@ -342,10 +345,11 @@ def merge_match_into_notebook(
342345
:param cell_meta: cell metadata keys to merge from cached notebook (all if None)
343346
:raises KeyError: if no match is found
344347
:return: pk, input notebook with cached code cells and metadata merged.
348+
345349
"""
346350
pk = self.match_cache_notebook(nb).pk
347351
cache_nb = self.get_cache_bundle(pk).nb
348-
nb = copy.deepcopy(nb)
352+
nb = nbf.convert(copy.deepcopy(nb), NB_VERSION)
349353
if nb_meta is None:
350354
nb.metadata = cache_nb.metadata
351355
else:
@@ -355,13 +359,18 @@ def merge_match_into_notebook(
355359
for idx in range(len(nb.cells)):
356360
if nb.cells[idx].cell_type == "code":
357361
cache_cell = cache_nb.cells.pop(0)
362+
in_cell = nb.cells[idx]
358363
if cell_meta is not None:
359364
# update the input metadata with select cached notebook metadata
360365
# then add the input metadata to the cached cell
361-
nb.cells[idx].metadata.update(
366+
in_cell.metadata.update(
362367
{k: v for k, v in cache_cell.metadata.items() if k in cell_meta}
363368
)
364-
cache_cell.metadata = nb.cells[idx].metadata
369+
cache_cell.metadata = in_cell.metadata
370+
if nb.nbformat_minor >= 5:
371+
cache_cell.id = in_cell.id
372+
else:
373+
cache_cell.pop("id", None)
365374
nb.cells[idx] = cache_cell
366375
return pk, nb
367376

@@ -376,7 +385,8 @@ def diff_nbnode_with_cache(
376385
from nbdime.prettyprint import pretty_print_diff, PrettyPrintConfig
377386

378387
cached_nb = self.get_cache_bundle(pk).nb
379-
nb = self._prepare_nb_for_cache(nb, deepcopy=True)
388+
nb, _ = self.create_hashed_notebook(nb)
389+
380390
diff = nbdime.diff_notebooks(cached_nb, nb)
381391
if not as_str:
382392
return diff
@@ -430,7 +440,7 @@ def get_staged_notebook(
430440
"The URI of the staged record no longer exists: {}".format(uri_or_pk)
431441
)
432442
if converter is None:
433-
notebook = nbf.read(uri_or_pk, NB_VERSION)
443+
notebook = nbf.read(uri_or_pk, nbf.NO_CONVERT)
434444
else:
435445
notebook = converter(uri_or_pk)
436446
return NbBundleIn(notebook, uri_or_pk)
@@ -443,7 +453,7 @@ def get_cache_record_of_staged(
443453
else:
444454
record = NbStageRecord.record_from_uri(uri_or_pk, self.db)
445455
nb = self.get_staged_notebook(record.uri, converter=converter).nb
446-
hashkey = self._hash_notebook(nb)
456+
_, hashkey = self.create_hashed_notebook(nb)
447457
try:
448458
return NbCacheRecord.record_from_hashkey(hashkey, self.db)
449459
except KeyError:
@@ -456,7 +466,7 @@ def list_staged_unexecuted(
456466
records = []
457467
for record in self.list_staged_records():
458468
nb = self.get_staged_notebook(record.uri, converter).nb
459-
hashkey = self._hash_notebook(nb)
469+
_, hashkey = self.create_hashed_notebook(nb)
460470
try:
461471
NbCacheRecord.record_from_hashkey(hashkey, self.db)
462472
except KeyError:

pytest.ini

Lines changed: 0 additions & 2 deletions
This file was deleted.

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"numpy",
5656
"sympy",
5757
"pandas",
58+
"nbformat>=5.1", # to include v4.5 notebook
5859
],
5960
"rtd": ["myst-nb~=0.7", "sphinx-copybutton", "pydata-sphinx-theme"],
6061
},

0 commit comments

Comments
 (0)