@@ -112,54 +112,63 @@ def change_cache_limit(self, size: int):
112112 assert isinstance (size , int ) and size > 0
113113 Setting .set_value (CACHE_LIMIT_KEY , size , self .db )
114114
115- def _create_hashable_nb (
115+ def create_hashed_notebook (
116116 self ,
117117 nb : nbf .NotebookNode ,
118- compare_nb_meta = ("kernelspec" ,),
119- compare_cell_meta = None ,
120- ):
121- """Create a notebook containing only content desired for hashing."""
118+ nb_metadata : Optional [Iterable [str ]] = ("kernelspec" ,),
119+ cell_metadata : Optional [Iterable [str ]] = None ,
120+ ) -> Tuple [nbf .NotebookNode , str ]:
121+ """Convert a notebook to a standard format and hash.
122+
123+ Note: we always hash notebooks as version 4.4,
124+ to allow for matching notebooks of different versions
125+
126+ :param nb_metadata: The notebook metadata keys to hash (if None, use all)
127+ :param cell_metadata: The cell metadata keys to hash (if None, use all)
128+
129+ :return: (notebook, hash)
130+ """
131+ # copy the notebook
122132 nb = copy .deepcopy (nb )
123- nb .metadata = nbf .from_dict (
133+ # update the notebook to consistent version 4.4
134+ nb = nbf .convert (nb , to_version = NB_VERSION )
135+ if nb .nbformat_minor > 5 :
136+ raise CachingError ("notebook version greater than 4.5 not yet supported" )
137+ # remove non-code cells
138+ nb .cells = [cell for cell in nb .cells if cell .cell_type == "code" ]
139+ # create notebook for hashing, with selected metadata
140+ hash_nb = nbf .from_dict (
124141 {
125- k : v
126- for k , v in nb .metadata .items ()
127- if compare_nb_meta is None or (k in compare_nb_meta )
142+ "nbformat" : nb .nbformat ,
143+ "nbformat_minor" : 4 , # v4.5 include cell ids, which we do not cache
144+ "metadata" : {
145+ k : v
146+ for k , v in nb .metadata .items ()
147+ if nb_metadata is None or (k in nb_metadata )
148+ },
149+ "cells" : [
150+ {
151+ "cell_type" : cell .cell_type ,
152+ "source" : cell .source ,
153+ "metadata" : {
154+ k : v
155+ for k , v in cell .metadata .items ()
156+ if cell_metadata is None or (k in cell_metadata )
157+ },
158+ "execution_count" : None ,
159+ "outputs" : [],
160+ }
161+ for cell in nb .cells
162+ if cell .cell_type == "code"
163+ ],
128164 }
129165 )
130- diff_cells = []
131- for cell in nb .cells :
132- if cell .cell_type != "code" :
133- continue
134- diff_cell = nbf .from_dict (
135- {
136- "cell_type" : cell .cell_type ,
137- "source" : cell .source ,
138- "metadata" : {
139- k : v
140- for k , v in cell .metadata .items ()
141- if compare_cell_meta is None or (k in compare_cell_meta )
142- },
143- "execution_count" : None ,
144- "outputs" : [],
145- }
146- )
147- diff_cells .append (diff_cell )
148- nb .cells = diff_cells
149- return nb
150166
151- def _hash_notebook (
152- self ,
153- nb : nbf .NotebookNode ,
154- compare_nb_meta = ("kernelspec" ,),
155- compare_cell_meta = None ,
156- ):
157- """Create a hashkey for a notebook bundle."""
158- nb = self ._create_hashable_nb (
159- nb , compare_nb_meta = compare_nb_meta , compare_cell_meta = compare_cell_meta
160- )
161- string = nbf .writes (nb , NB_VERSION )
162- return hashlib .md5 (string .encode ()).hexdigest ()
167+ # hash notebook
168+ string = nbf .writes (hash_nb , nbf .NO_CONVERT )
169+ hash_string = hashlib .md5 (string .encode ()).hexdigest ()
170+
171+ return (nb , hash_string )
163172
164173 def _validate_nb_bundle (self , nb_bundle : NbBundleIn ):
165174 """Validate that a notebook bundle should be cached.
@@ -182,13 +191,6 @@ def _validate_nb_bundle(self, nb_bundle: NbBundleIn):
182191 # TODO check for output exceptions?
183192 # TODO assets
184193
185- def _prepare_nb_for_cache (self , nb : nbf .NotebookNode , deepcopy = False ):
186- """Prepare in-place, we remove non-code cells."""
187- if deepcopy :
188- nb = copy .deepcopy (nb )
189- nb .cells = [cell for cell in nb .cells if cell .cell_type == "code" ]
190- return nb
191-
192194 def cache_notebook_bundle (
193195 self ,
194196 bundle : NbBundleIn ,
@@ -201,7 +203,9 @@ def cache_notebook_bundle(
201203
202204 if check_validity :
203205 self ._validate_nb_bundle (bundle )
204- hashkey = self ._hash_notebook (bundle .nb )
206+
207+ hashed_nb , hashkey = self .create_hashed_notebook (bundle .nb )
208+
205209 path = self ._get_notebook_path_cache (hashkey )
206210 if path .exists ():
207211 if not overwrite :
@@ -221,8 +225,7 @@ def cache_notebook_bundle(
221225 description = description ,
222226 )
223227 path .parent .mkdir (parents = True )
224- self ._prepare_nb_for_cache (bundle .nb )
225- path .write_text (nbf .writes (bundle .nb , NB_VERSION ), encoding = "utf8" )
228+ path .write_text (nbf .writes (hashed_nb , nbf .NO_CONVERT ), encoding = "utf8" )
226229
227230 # write artifacts
228231 artifact_folder = self ._get_artifact_path_cache (hashkey )
@@ -260,7 +263,7 @@ def cache_notebook_file(
260263 :param overwrite: Allow overwrite of cached notebooks with matching hash
261264 :return: The primary key of the cache record
262265 """
263- notebook = nbf .read (str (path ), NB_VERSION )
266+ notebook = nbf .read (str (path ), nbf . NO_CONVERT )
264267 return self .cache_notebook_bundle (
265268 NbBundleIn (
266269 notebook ,
@@ -289,7 +292,7 @@ def get_cache_bundle(self, pk: int) -> NbBundleOut:
289292 )
290293
291294 return NbBundleOut (
292- nbf .reads (path .read_text (encoding = "utf8" ), NB_VERSION ),
295+ nbf .reads (path .read_text (encoding = "utf8" ), nbf . NO_CONVERT ),
293296 record = record ,
294297 artifacts = NbArtifacts (
295298 [p for p in artifact_folder .glob ("**/*" ) if p .is_file ()],
@@ -325,15 +328,15 @@ def match_cache_notebook(self, nb: nbf.NotebookNode) -> NbCacheRecord:
325328
326329 :raises KeyError: if no match is found
327330 """
328- hashkey = self ._hash_notebook (nb )
331+ _ , hashkey = self .create_hashed_notebook (nb )
329332 cache_record = NbCacheRecord .record_from_hashkey (hashkey , self .db )
330333 return cache_record
331334
332335 def merge_match_into_notebook (
333336 self ,
334337 nb : nbf .NotebookNode ,
335- nb_meta = ("kernelspec" , "language_info" , "widgets" ),
336- cell_meta = None ,
338+ nb_meta : Optional [ Iterable [ str ]] = ("kernelspec" , "language_info" , "widgets" ),
339+ cell_meta : Optional [ Iterable [ str ]] = None ,
337340 ) -> Tuple [int , nbf .NotebookNode ]:
338341 """Match to an executed notebook and return a merged version
339342
@@ -342,10 +345,11 @@ def merge_match_into_notebook(
342345 :param cell_meta: cell metadata keys to merge from cached notebook (all if None)
343346 :raises KeyError: if no match is found
344347 :return: pk, input notebook with cached code cells and metadata merged.
348+
345349 """
346350 pk = self .match_cache_notebook (nb ).pk
347351 cache_nb = self .get_cache_bundle (pk ).nb
348- nb = copy .deepcopy (nb )
352+ nb = nbf . convert ( copy .deepcopy (nb ), NB_VERSION )
349353 if nb_meta is None :
350354 nb .metadata = cache_nb .metadata
351355 else :
@@ -355,13 +359,18 @@ def merge_match_into_notebook(
355359 for idx in range (len (nb .cells )):
356360 if nb .cells [idx ].cell_type == "code" :
357361 cache_cell = cache_nb .cells .pop (0 )
362+ in_cell = nb .cells [idx ]
358363 if cell_meta is not None :
359364 # update the input metadata with select cached notebook metadata
360365 # then add the input metadata to the cached cell
361- nb . cells [ idx ] .metadata .update (
366+ in_cell .metadata .update (
362367 {k : v for k , v in cache_cell .metadata .items () if k in cell_meta }
363368 )
364- cache_cell .metadata = nb .cells [idx ].metadata
369+ cache_cell .metadata = in_cell .metadata
370+ if nb .nbformat_minor >= 5 :
371+ cache_cell .id = in_cell .id
372+ else :
373+ cache_cell .pop ("id" , None )
365374 nb .cells [idx ] = cache_cell
366375 return pk , nb
367376
@@ -376,7 +385,8 @@ def diff_nbnode_with_cache(
376385 from nbdime .prettyprint import pretty_print_diff , PrettyPrintConfig
377386
378387 cached_nb = self .get_cache_bundle (pk ).nb
379- nb = self ._prepare_nb_for_cache (nb , deepcopy = True )
388+ nb , _ = self .create_hashed_notebook (nb )
389+
380390 diff = nbdime .diff_notebooks (cached_nb , nb )
381391 if not as_str :
382392 return diff
@@ -430,7 +440,7 @@ def get_staged_notebook(
430440 "The URI of the staged record no longer exists: {}" .format (uri_or_pk )
431441 )
432442 if converter is None :
433- notebook = nbf .read (uri_or_pk , NB_VERSION )
443+ notebook = nbf .read (uri_or_pk , nbf . NO_CONVERT )
434444 else :
435445 notebook = converter (uri_or_pk )
436446 return NbBundleIn (notebook , uri_or_pk )
@@ -443,7 +453,7 @@ def get_cache_record_of_staged(
443453 else :
444454 record = NbStageRecord .record_from_uri (uri_or_pk , self .db )
445455 nb = self .get_staged_notebook (record .uri , converter = converter ).nb
446- hashkey = self ._hash_notebook (nb )
456+ _ , hashkey = self .create_hashed_notebook (nb )
447457 try :
448458 return NbCacheRecord .record_from_hashkey (hashkey , self .db )
449459 except KeyError :
@@ -456,7 +466,7 @@ def list_staged_unexecuted(
456466 records = []
457467 for record in self .list_staged_records ():
458468 nb = self .get_staged_notebook (record .uri , converter ).nb
459- hashkey = self ._hash_notebook (nb )
469+ _ , hashkey = self .create_hashed_notebook (nb )
460470 try :
461471 NbCacheRecord .record_from_hashkey (hashkey , self .db )
462472 except KeyError :
0 commit comments