From a07fd478fa4b1d98013aaa0ad132a2b191d33f02 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Fri, 19 Apr 2019 14:42:22 -0400 Subject: [PATCH 1/9] Added sanitize_np function, which is faster than sanitize_doc --- event_model/__init__.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/event_model/__init__.py b/event_model/__init__.py index 5a950cf99..37aef8630 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -11,6 +11,7 @@ import uuid import warnings from ._version import get_versions +import numpy as np __all__ = ['DocumentNames', 'schemas', 'compose_run'] @@ -896,6 +897,38 @@ def verify_filled(event_page): f"`event_model.Filler`.") +def sanitize_np(doc): + '''Return a copy with any numpy objects converted to built-in Python types. + This is a faster version of sanitize_doc which only converts numpy objects. + + This function takes in an event-model document and returns a copy with any + numpy objects converted to built-in Python types. It is useful for + sanitizing documents prior to sending to any consumer that does not + recognize numpy types, such as a MongoDB database or a JSON encoder. + + Parameters + ---------- + doc : dict + The event-model document to be sanitized + + Returns + ------- + sanitized_doc : event-model document + The event-model document with numpy objects converted to built-in + Python types. + ''' + return {key: sanitize_item(value) for key, value in doc.items()} + + +def sanitize_item(val): + "Convert any numpy objects into built-in Python types." + if isinstance(val, (np.generic, np.ndarray)): + if np.isscalar(val): + return val.item() + return val.tolist() + return val + + def sanitize_doc(doc): '''Return a copy with any numpy objects converted to built-in Python types. @@ -936,3 +969,4 @@ def default(self, obj): return obj.item() return obj.tolist() return json.JSONEncoder.default(self, obj) + From 64920010ca5795047b0ee5a5a840cf4cc03f2110 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Fri, 19 Apr 2019 14:49:17 -0400 Subject: [PATCH 2/9] flake8 fix --- event_model/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index 37aef8630..b9cdf1da9 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -969,4 +969,3 @@ def default(self, obj): return obj.item() return obj.tolist() return json.JSONEncoder.default(self, obj) - From c59d3e88d4f4931a92281b84914e182365ce3f94 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Wed, 24 Apr 2019 17:29:14 -0400 Subject: [PATCH 3/9] recursive iteration for sanitize_np --- event_model/__init__.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index b9cdf1da9..c97773b89 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -917,7 +917,16 @@ def sanitize_np(doc): The event-model document with numpy objects converted to built-in Python types. ''' - return {key: sanitize_item(value) for key, value in doc.items()} + def iterate_sanitize(doc): + for key, value in doc.items(): + if isinstance(value, dict): + iterate_sanitize(value) + else: + value = sanitize_item(value) + + iterate_sanitize(doc) + return doc + def sanitize_item(val): From 42fbbc283eb1f2cffc711577d2045ddd9ebf9b08 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Wed, 24 Apr 2019 17:34:22 -0400 Subject: [PATCH 4/9] flake8 fix --- event_model/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index c97773b89..a8e78bed4 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -928,7 +928,6 @@ def iterate_sanitize(doc): return doc - def sanitize_item(val): "Convert any numpy objects into built-in Python types." if isinstance(val, (np.generic, np.ndarray)): From 6170f540e075f243c9ed97003de92e1c213a478a Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Thu, 25 Apr 2019 11:18:27 -0400 Subject: [PATCH 5/9] sanitize_np lists --- event_model/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/event_model/__init__.py b/event_model/__init__.py index a8e78bed4..68dc46a10 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -921,6 +921,8 @@ def iterate_sanitize(doc): for key, value in doc.items(): if isinstance(value, dict): iterate_sanitize(value) + elif isinstance(value, list): + value = [sanitize_item(item) for item in value] else: value = sanitize_item(value) From 9c52b0fe6d85737b3676fb0bfe648cb320d26b94 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Thu, 25 Apr 2019 12:00:59 -0400 Subject: [PATCH 6/9] update to iteration --- event_model/__init__.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index 68dc46a10..86a74ef58 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -12,6 +12,7 @@ import warnings from ._version import get_versions import numpy as np +import collections __all__ = ['DocumentNames', 'schemas', 'compose_run'] @@ -918,13 +919,15 @@ def sanitize_np(doc): Python types. ''' def iterate_sanitize(doc): - for key, value in doc.items(): - if isinstance(value, dict): + if hasattr(doc, 'items'): + for value in doc.values(): iterate_sanitize(value) - elif isinstance(value, list): - value = [sanitize_item(item) for item in value] - else: - value = sanitize_item(value) + elif isinstance(doc, collections.abc.Iterable): + doc = list(doc) # Change tuples to lists + for value in doc: + iterate_sanitize(value) + else: + doc = sanitize_item(doc) iterate_sanitize(doc) return doc From a543ec96cded0fc7f5f9f954f2bcb480ac7e3a98 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Thu, 25 Apr 2019 13:35:42 -0400 Subject: [PATCH 7/9] Update to recurnsion --- event_model/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index 86a74ef58..a9a968aef 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -918,13 +918,14 @@ def sanitize_np(doc): The event-model document with numpy objects converted to built-in Python types. ''' + def iterate_sanitize(doc): if hasattr(doc, 'items'): for value in doc.values(): iterate_sanitize(value) - elif isinstance(doc, collections.abc.Iterable): - doc = list(doc) # Change tuples to lists - for value in doc: + elif isinstance(doc, collections.abc.Iterable) and not isinstance(str): + doc = list(doc) # Change tuples to lists + for value in doc: iterate_sanitize(value) else: doc = sanitize_item(doc) From 0d72bebeeece2e692358b961b8fa9b8087acb3dd Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Thu, 25 Apr 2019 13:47:56 -0400 Subject: [PATCH 8/9] Updated so that it doesn't mutate incomming object --- event_model/__init__.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index a9a968aef..3a05ad232 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -918,19 +918,13 @@ def sanitize_np(doc): The event-model document with numpy objects converted to built-in Python types. ''' + if hasattr(doc, 'items'): + return {key: sanitize_np(value) for key, value in doc.items()} + elif isinstance(doc, collections.abc.Iterable) and not isinstance(doc, str): + return [sanitize_np(item) for item in doc] + else: + return sanitize_item(doc) - def iterate_sanitize(doc): - if hasattr(doc, 'items'): - for value in doc.values(): - iterate_sanitize(value) - elif isinstance(doc, collections.abc.Iterable) and not isinstance(str): - doc = list(doc) # Change tuples to lists - for value in doc: - iterate_sanitize(value) - else: - doc = sanitize_item(doc) - - iterate_sanitize(doc) return doc From 74035491df66312544b1dcd2f5555d8db8adedc5 Mon Sep 17 00:00:00 2001 From: Garrett Bischof Date: Thu, 25 Apr 2019 14:01:45 -0400 Subject: [PATCH 9/9] fix for return statement --- event_model/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/event_model/__init__.py b/event_model/__init__.py index 3a05ad232..c91e2c1a4 100644 --- a/event_model/__init__.py +++ b/event_model/__init__.py @@ -925,7 +925,7 @@ def sanitize_np(doc): else: return sanitize_item(doc) - return doc + return sanitize_np(doc) def sanitize_item(val):