diff --git a/build-scripts/ubuntu-2004/build-3rd-parties.sh b/build-scripts/ubuntu-2004/build-3rd-parties.sh index b8aef2358f..97f4f07e1b 100755 --- a/build-scripts/ubuntu-2004/build-3rd-parties.sh +++ b/build-scripts/ubuntu-2004/build-3rd-parties.sh @@ -126,4 +126,3 @@ build_from_pypi semver 2.13.0 build_from_pypi sha3 build_from_pypi six build_from_pypi sortedcontainers 1.5.7 -build_from_pypi ujson 1.33 diff --git a/build-scripts/ubuntu-2004/prepare-package.sh b/build-scripts/ubuntu-2004/prepare-package.sh index 7579ccf81d..44eacc88a4 100755 --- a/build-scripts/ubuntu-2004/prepare-package.sh +++ b/build-scripts/ubuntu-2004/prepare-package.sh @@ -29,7 +29,6 @@ if [ "$distro_packages" = "debian-packages" ]; then # Update the package names to match the versions that are pre-installed on the os. echo -e "\nAdapt the dependencies for the Canonical archive" #### ToDo adjust packages for the Cannonical archive for Ubuntu 20.04 (focal) - # sed -i "s~ujson==1.33~ujson==1.33-1build1~" setup.py # sed -i "s~prompt_toolkit==0.57~prompt_toolkit==0.57-1~" setup.py # sed -i "s~msgpack-python==0.4.6~msgpack==0.4.6-1build1~" setup.py elif [ "$distro_packages" = "python-packages" ]; then diff --git a/common/serializers/json_serializer.py b/common/serializers/json_serializer.py index a012dbe0cd..71fa03da9b 100644 --- a/common/serializers/json_serializer.py +++ b/common/serializers/json_serializer.py @@ -2,45 +2,27 @@ import base64 +import json from typing import Dict from common.serializers.mapping_serializer import MappingSerializer -try: - import ujson as json - from ujson import encode as uencode - # Older versions of ujson's encode do not support `sort_keys`, if that - # is the case default to using json - uencode({'xx': '123', 'aa': 90}, sort_keys=True) +class OrderedJsonEncoder(json.JSONEncoder): + def __init__(self, *args, **kwargs): + kwargs['ensure_ascii'] = False + kwargs['sort_keys'] = True + kwargs['separators'] = (',', ':') + super().__init__(*args, **kwargs) - class UJsonEncoder: - @staticmethod - def encode(o): - if isinstance(o, (bytes, bytearray)): - return '"{}"'.format(base64.b64encode(o).decode("utf-8")) - else: - return uencode(o, sort_keys=True) + def encode(self, o): + if isinstance(o, (bytes, bytearray)): + return '"{}"'.format(base64.b64encode(o).decode("utf-8")) + else: + return super().encode(o) - JsonEncoder = UJsonEncoder() -except (ImportError, TypeError): - import json - - class OrderedJsonEncoder(json.JSONEncoder): - def __init__(self, *args, **kwargs): - kwargs['ensure_ascii'] = False - kwargs['sort_keys'] = True - kwargs['separators'] = (',', ':') - super().__init__(*args, **kwargs) - - def encode(self, o): - if isinstance(o, (bytes, bytearray)): - return '"{}"'.format(base64.b64encode(o).decode("utf-8")) - else: - return super().encode(o) - - JsonEncoder = OrderedJsonEncoder() +JsonEncoder = OrderedJsonEncoder() class JsonSerializer(MappingSerializer): diff --git a/common/serializers/signing_serializer.py b/common/serializers/signing_serializer.py index 4888cb5f46..6ab526f8e3 100644 --- a/common/serializers/signing_serializer.py +++ b/common/serializers/signing_serializer.py @@ -90,6 +90,3 @@ def serialize(self, obj, level=0, objname=None, topLevelKeysToIgnore=None, return res return res.encode('utf-8') - - # topLevelKeysToIgnore = topLevelKeysToIgnore or [] - # return ujson.dumps({k:obj[k] for k in obj.keys() if k not in topLevelKeysToIgnore}, sort_keys=True) diff --git a/common/test/test_json_serializer_ujson_compat.py b/common/test/test_json_serializer_ujson_compat.py new file mode 100644 index 0000000000..2a5eb8cacb --- /dev/null +++ b/common/test/test_json_serializer_ujson_compat.py @@ -0,0 +1,73 @@ +""" +Regression tests added when the ``ujson`` dependency was removed: pin the +exact byte output of ``JsonSerializer`` (sorted keys, compact separators, +raw UTF-8), since it feeds signing/hashing and must stay byte-stable. +""" +import pytest + +from common.serializers.json_serializer import JsonSerializer + + +@pytest.fixture +def sz(): + return JsonSerializer() + + +def test_keys_sorted_and_compact(sz): + # Insertion order must not affect output; keys are sorted, no whitespace. + assert sz.serialize({'b': 2, 'a': 1, 'c': 3}, toBytes=False) == '{"a":1,"b":2,"c":3}' + + +def test_non_ascii_kept_raw_utf8(sz): + # ensure_ascii=False: characters are emitted as raw UTF-8, not \\uXXXX. + assert sz.serialize({'name': 'héllo', 'kr': '한글', 'emoji': '🚀'}, + toBytes=False) == '{"emoji":"🚀","kr":"한글","name":"héllo"}' + # And the bytes form is the UTF-8 encoding of that string. + assert sz.serialize({'name': 'héllo'}) == '{"name":"héllo"}'.encode('utf-8') + + +def test_float_repr_pinned(sz): + assert sz.serialize({'a': 14.8639, 'b': -97.466179, 'c': 1.0, 'd': 1e20}, + toBytes=False) == '{"a":14.8639,"b":-97.466179,"c":1.0,"d":1e+20}' + + +def test_int_keys_become_strings(sz): + # json coerces non-string keys to strings and then sorts lexicographically. + assert sz.serialize({3: 'c', 1: 'a', 2: 'b'}, toBytes=False) == '{"1":"a","2":"b","3":"c"}' + + +def test_bool_and_none(sz): + assert sz.serialize({'t': True, 'f': False, 'n': None}, + toBytes=False) == '{"f":false,"n":null,"t":true}' + + +def test_empty_dict(sz): + assert sz.serialize({}, toBytes=False) == '{}' + + +def test_top_level_bytes_base64(sz): + # The OrderedJsonEncoder.encode override base64-encodes a top-level + # bytes/bytearray value (b'raw' -> base64 'cmF3'). + assert sz.serialize(b'raw', toBytes=False) == '"cmF3"' + assert sz.serialize(bytearray(b'raw'), toBytes=False) == '"cmF3"' + + +def test_round_trip(sz): + data = {'name': 'Alice', 'n': 1, 'f': 1.5, 'b': True, 'list': [1, 'two', None]} + assert sz.deserialize(sz.serialize(data)) == data + assert sz.deserialize(sz.serialize(data, toBytes=False)) == data + + +@pytest.mark.parametrize('value', [ + {'z': b'raw'}, # bytes nested in a dict value + [b'raw'], # bytes nested in a list + {'z': bytearray(b'raw')}, +]) +def test_nested_bytes_raise_typeerror(sz, value): + """ + The bytes special-case in ``OrderedJsonEncoder.encode`` only fires for a + top-level value; bytes nested in a container raise TypeError. A fix, if + ever needed, belongs in ``OrderedJsonEncoder.default``. + """ + with pytest.raises(TypeError): + sz.serialize(value) diff --git a/dev-setup/ubuntu/ubuntu-2004/SetupVMTest.txt b/dev-setup/ubuntu/ubuntu-2004/SetupVMTest.txt index b905bd140e..0eca41813f 100644 --- a/dev-setup/ubuntu/ubuntu-2004/SetupVMTest.txt +++ b/dev-setup/ubuntu/ubuntu-2004/SetupVMTest.txt @@ -82,7 +82,6 @@ sortedcontainers==1.5.7 \ timeout-decorator==0.5.0 \ toml==0.10.2 \ - ujson==1.33 \ wcwidth==0.2.5 \ wheel==0.34.2 \ zipp==1.2.0 diff --git a/ledger/test/helper.py b/ledger/test/helper.py index a1a2194e55..548a9a6ae6 100644 --- a/ledger/test/helper.py +++ b/ledger/test/helper.py @@ -108,7 +108,6 @@ def create_ledger_rocksdb_storage(txn_serializer, hash_serializer, tempdir, init def create_ledger_chunked_file_storage(txn_serializer, hash_serializer, tempdir, init_genesis_txn_file=None): - chunk_creator = None db_name = 'transactions' if isinstance(txn_serializer, MsgPackSerializer): # TODO: fix chunk_creator support @@ -119,6 +118,8 @@ def chunk_creator(name): isLineNoKey=True, storeContentHash=False, ensureDurability=False) + else: + chunk_creator = None store = ChunkedFileStore(tempdir, db_name, isLineNoKey=True, diff --git a/plenum/common/channel.py b/plenum/common/channel.py index de782ca6aa..70eb36f842 100644 --- a/plenum/common/channel.py +++ b/plenum/common/channel.py @@ -156,7 +156,7 @@ def _process_sync(self, msg: Any): # This is done so that messages can include additional metadata # isinstance is not used here because it returns true for NamedTuple # as well. - if type(msg) != tuple: + if type(msg) is not tuple: msg = (msg,) handler = self._find_handler(msg[0]) if handler is None: diff --git a/plenum/common/util.py b/plenum/common/util.py index ea44ef9206..8f6fd3a082 100644 --- a/plenum/common/util.py +++ b/plenum/common/util.py @@ -342,6 +342,14 @@ def z85_to_friendly(z): return z +def json_default_bytes_to_str(obj): + """``json.dumps`` default hook decoding bytes/bytearray as UTF-8.""" + if isinstance(obj, (bytes, bytearray)): + return obj.decode() + raise TypeError('Object of type {} is not JSON ' + 'serializable'.format(type(obj).__name__)) + + def runWithLoop(loop, callback, *args, **kwargs): if loop.is_running(): loop.call_soon(asyncio.ensure_future, callback(*args, **kwargs)) diff --git a/plenum/recorder/recorder.py b/plenum/recorder/recorder.py index ca897d9f33..1003caae66 100644 --- a/plenum/recorder/recorder.py +++ b/plenum/recorder/recorder.py @@ -1,14 +1,11 @@ import os import time +import json from typing import Callable +from plenum.common.util import json_default_bytes_to_str from storage.kv_store_rocksdb_int_keys import KeyValueStorageRocksdbIntKeys -try: - import ujson as json -except ImportError: - import json - class Recorder: INCOMING_FLAG = 0 @@ -50,7 +47,8 @@ def add_to_store(self, key, val): existing = json.loads(existing) except KeyError: existing = [] - self.store.put(key, json.dumps([*existing, val])) + self.store.put(key, json.dumps([*existing, val], + default=json_default_bytes_to_str)) def register_replay_target(self, id, target: Callable): assert id not in self.replay_targets diff --git a/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py b/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py index 1fab98b30f..6bf2f4a45e 100644 --- a/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py +++ b/plenum/server/plugin/stats_consumer/plugin_firebase_stats_consumer.py @@ -6,11 +6,11 @@ from plenum.common.types import EVENT_PERIODIC_STATS_THROUGHPUT, \ EVENT_NODE_STARTED, EVENT_REQ_ORDERED, EVENT_PERIODIC_STATS_LATENCIES, \ PLUGIN_TYPE_STATS_CONSUMER, EVENT_VIEW_CHANGE, EVENT_PERIODIC_STATS_NODES, \ - EVENT_PERIODIC_STATS_TOTAL_REQUESTS, EVENT_PERIODIC_STATS_NODE_INFO,\ + EVENT_PERIODIC_STATS_TOTAL_REQUESTS, EVENT_PERIODIC_STATS_NODE_INFO, \ EVENT_PERIODIC_STATS_SYSTEM_PERFORMANCE_INFO from stp_core.common.log import getlogger from plenum.config import STATS_SERVER_IP, STATS_SERVER_PORT, STATS_SERVER_MESSAGE_BUFFER_MAX_SIZE -from plenum.server.plugin.stats_consumer.stats_publisher import StatsPublisher,\ +from plenum.server.plugin.stats_consumer.stats_publisher import StatsPublisher, \ Topic from plenum.server.plugin_loader import HasDynamicallyImportedModules from plenum.server.stats_consumer import StatsConsumer diff --git a/plenum/test/recorder/test_recorder.py b/plenum/test/recorder/test_recorder.py index 29d85aa26f..5eef608377 100644 --- a/plenum/test/recorder/test_recorder.py +++ b/plenum/test/recorder/test_recorder.py @@ -1,14 +1,10 @@ import random import time +import json from collections import OrderedDict from plenum.common.util import randomString -try: - import ujson as json -except ImportError: - import json - import pytest from plenum.recorder.recorder import Recorder @@ -60,6 +56,19 @@ def test_add_to_recorder(recorder): i += 1 +def test_add_to_recorder_with_bytes(recorder): + msg1, frm1 = b'{"msg": "m1"}', b'f1' + msg2, to1 = b'{"msg": "m2"}', 't1' + recorder.add_incoming(msg1, frm1) + recorder.add_outgoing(msg2, to1) + + all_msgs = [] + for _, v in recorder.store.iterator(include_value=True): + all_msgs.extend(Recorder.get_parsed(v)) + assert Recorder.filter_incoming(all_msgs) == [['{"msg": "m1"}', 'f1']] + assert Recorder.filter_outgoing(all_msgs) == [['{"msg": "m2"}', 't1']] + + def test_get_list_from_recorder(recorder): msg1, frm1 = 'm1', 'f1' msg2, frm2 = 'm2', 'f2' diff --git a/scripts/test_zmq/test_zmq/zstack.py b/scripts/test_zmq/test_zmq/zstack.py index f80b818235..73ed6ccd64 100644 --- a/scripts/test_zmq/test_zmq/zstack.py +++ b/scripts/test_zmq/test_zmq/zstack.py @@ -1,10 +1,5 @@ from test_zmq.authenticator import MultiZapAuthenticator - -try: - import ujson as json -except ImportError: - import json - +import json import os import shutil import sys @@ -561,7 +556,7 @@ def transmit(self, msg, uid, timeout=None, serialized=False, is_batch=False): @staticmethod def serializeMsg(msg): if isinstance(msg, Mapping): - msg = json.dumps(msg) + msg = json.dumps(msg, separators=(',', ':')) if isinstance(msg, str): msg = msg.encode() assert isinstance(msg, bytes) diff --git a/setup.py b/setup.py index 87925e7837..035f345447 100644 --- a/setup.py +++ b/setup.py @@ -131,8 +131,6 @@ def run(self): 'six', ### Tests fail without version pin (GHA run: https://github.com/udosson/indy-plenum/actions/runs/1078741118) 'sortedcontainers==1.5.7', - ### Tests fail without version pin (GHA run: https://github.com/udosson/indy-plenum/actions/runs/1078741118) - 'ujson==1.33', ], setup_requires=['pytest-runner==5.3.0'], diff --git a/stp_zmq/zstack.py b/stp_zmq/zstack.py index 32a2a07f60..19df6bd444 100644 --- a/stp_zmq/zstack.py +++ b/stp_zmq/zstack.py @@ -1,4 +1,5 @@ import inspect +import json from plenum.common.constants import OP_FIELD_NAME, BATCH from plenum.common.metrics_collector import NullMetricsCollector @@ -8,10 +9,6 @@ from stp_core.common.constants import CONNECTION_PREFIX, ZMQ_NETWORK_PROTOCOL from stp_zmq.client_message_provider import ClientMessageProvider -try: - import ujson as json -except ImportError: - import json import os import shutil @@ -871,7 +868,7 @@ def transmit(self, msg, uid, timeout=None, serialized=False, is_batch=False): @staticmethod def serializeMsg(msg): if isinstance(msg, Mapping): - msg = json.dumps(msg) + msg = json.dumps(msg, separators=(',', ':')) if isinstance(msg, str): msg = msg.encode() assert isinstance(msg, bytes)