diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 08c1fd0d..a8332c4c 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -34,13 +34,14 @@ typedef struct JSON_Generator_StateStruct { bool ascii_only; bool script_safe; bool strict; + VALUE sort_keys; } JSON_Generator_State; static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8; static ID i_to_s, i_to_json, i_new, i_encode; static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key, - sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json; + sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json, sym_sort_keys; #define GET_STATE_TO(self, state) \ @@ -709,6 +710,7 @@ static void State_mark(void *ptr) rb_gc_mark_movable(state->object_nl); rb_gc_mark_movable(state->array_nl); rb_gc_mark_movable(state->as_json); + rb_gc_mark_movable(state->sort_keys); } static void State_compact(void *ptr) @@ -720,6 +722,7 @@ static void State_compact(void *ptr) state->object_nl = rb_gc_location(state->object_nl); state->array_nl = rb_gc_location(state->array_nl); state->as_json = rb_gc_location(state->as_json); + state->sort_keys = rb_gc_location(state->sort_keys); } static size_t State_memsize(const void *ptr) @@ -769,6 +772,7 @@ static void vstate_spill(struct generate_json_data *data) RB_OBJ_WRITTEN(vstate, Qundef, state->object_nl); RB_OBJ_WRITTEN(vstate, Qundef, state->array_nl); RB_OBJ_WRITTEN(vstate, Qundef, state->as_json); + RB_OBJ_WRITTEN(vstate, Qundef, state->sort_keys); } static inline VALUE json_call_to_json(struct generate_json_data *data, VALUE obj) @@ -1050,6 +1054,17 @@ static inline long increase_depth(struct generate_json_data *data) static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, VALUE obj) { + if (RB_UNLIKELY(RTEST(data->state->sort_keys))) { + VALUE sort_keys = data->state->sort_keys; + VALUE sorted_array; + if (rb_obj_is_proc(sort_keys)) { + sorted_array = rb_funcall_with_block(obj, rb_intern("sort"), 0, NULL, sort_keys); + } else { + sorted_array = rb_funcall(obj, rb_intern("sort"), 0); + } + obj = rb_funcall(sorted_array, rb_intern("to_h"), 0); + } + long depth = increase_depth(data); if (RHASH_SIZE(obj) == 0) { @@ -1376,6 +1391,7 @@ static VALUE cState_init_copy(VALUE obj, VALUE orig) RB_OBJ_WRITTEN(obj, Qundef, objState->object_nl); RB_OBJ_WRITTEN(obj, Qundef, objState->array_nl); RB_OBJ_WRITTEN(obj, Qundef, objState->as_json); + RB_OBJ_WRITTEN(obj, Qundef, objState->sort_keys); return obj; } @@ -1722,6 +1738,35 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable) return Qnil; } +/* + * call-seq: sort_keys + * + * Get the value of sort_keys. + */ +static VALUE cState_sort_keys_p(VALUE self) +{ + GET_STATE(self); + return state->sort_keys; +} + +/* + * call-seq: sort_keys=(value) + * + * value is a boolean or proc. If the value is the boolean true, + * object keys will be sorted lexicographically in ascending order. + * + * If the value is a proc, it must be a comparator. It will receive two + * [key, value] pairs to allow for arbitrary sorting. + */ +static VALUE cState_sort_keys_set(VALUE self, VALUE value) +{ + rb_check_frozen(self); + GET_STATE(self); + VALUE sort_keys = rb_obj_is_proc(value) ? value : (RTEST(value) ? Qtrue : Qfalse); + RB_OBJ_WRITE(self, &state->sort_keys, sort_keys); + return Qnil; +} + static VALUE cState_allow_duplicate_key_p(VALUE self) { GET_STATE(self); @@ -1832,6 +1877,10 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg) state->as_json_single_arg = proc && rb_proc_arity(proc) == 1; state_write_value(data, &state->as_json, proc); } + else if (key == sym_sort_keys) { + VALUE sort_keys = rb_obj_is_proc(val) ? val : (RTEST(val) ? Qtrue : Qfalse); + state_write_value(data, &state->sort_keys, sort_keys); + } return ST_CONTINUE; } @@ -1957,6 +2006,8 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, -1); rb_define_method(cState, "_generate_no_fallback", cState_generate_no_fallback, -1); + rb_define_method(cState, "sort_keys", cState_sort_keys_p, 0); + rb_define_method(cState, "sort_keys=", cState_sort_keys_set, 1); rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0); @@ -1986,6 +2037,7 @@ void Init_generator(void) sym_strict = ID2SYM(rb_intern("strict")); sym_as_json = ID2SYM(rb_intern("as_json")); sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key")); + sym_sort_keys = ID2SYM(rb_intern("sort_keys")); usascii_encindex = rb_usascii_encindex(); utf8_encindex = rb_utf8_encindex(); diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index ea9e6d08..b918895d 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -17,6 +17,7 @@ import org.jruby.RubyFloat; import org.jruby.RubyHash; import org.jruby.RubyIO; +import org.jruby.RubyProc; import org.jruby.RubyString; import org.jruby.RubySymbol; import org.jruby.runtime.Helpers; @@ -573,6 +574,14 @@ static void generateHash(ThreadContext context, Session session, RubyHash object return; } + if (state.sortKeys()) { + RubyProc comparator = state.getSortKeysProc(); + RubyArray sortedPairs = comparator != null + ? (RubyArray) Helpers.invoke(context, object, "sort", comparator.getBlock()) + : (RubyArray) object.callMethod(context, "sort"); + object = (RubyHash) sortedPairs.callMethod(context, "to_h"); + } + final ByteList objectNl = state.getObjectNl(); byte[] objectNLBytes = objectNl.unsafeBytes(); final byte[] indent = Utils.repeat(state.getIndent(), depth); diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index 11d55559..a51f8ff3 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -104,6 +104,13 @@ public class GeneratorState extends RubyObject { private int bufferInitialLength = DEFAULT_BUFFER_INITIAL_LENGTH; static final int DEFAULT_BUFFER_INITIAL_LENGTH = 1024; + /** + * Controls key sorting when generating JSON. null means keys + * are emitted in insertion order; a true value sorts keys lexicographically; + * a {@link RubyProc} is used as a comparator receiving two [key, value] pairs. + */ + private IRubyObject sortKeys; + /** * The current depth (inside a #to_json call) */ @@ -222,6 +229,7 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) { this.allowDuplicateKey = orig.allowDuplicateKey; this.deprecateDuplicateKey = orig.deprecateDuplicateKey; + this.sortKeys = orig.sortKeys; return this; } @@ -431,6 +439,27 @@ public boolean strict() { return strict; } + /** + * Returns true if object keys should be sorted (either lexicographically + * or with a custom comparator). + */ + public boolean sortKeys() { + return sortKeys != null && sortKeys.isTrue(); + } + + /** + * Returns the comparator proc used to sort keys, or null if + * keys should be sorted lexicographically. + */ + public RubyProc getSortKeysProc() { + return sortKeys instanceof RubyProc ? (RubyProc) sortKeys : null; + } + + private static IRubyObject normalizeSortKeys(ThreadContext context, IRubyObject value) { + if (value instanceof RubyProc) return value; + return (value != null && value.isTrue()) ? context.getRuntime().getTrue() : null; + } + @JRubyMethod(name={"strict","strict?"}) public RubyBoolean strict_get(ThreadContext context) { return RubyBoolean.newBoolean(context, strict); @@ -474,6 +503,18 @@ public IRubyObject buffer_initial_length_set(IRubyObject buffer_initial_length) return buffer_initial_length; } + @JRubyMethod(name="sort_keys") + public IRubyObject sort_keys_get(ThreadContext context) { + return sortKeys == null ? context.getRuntime().getFalse() : sortKeys; + } + + @JRubyMethod(name="sort_keys=") + public IRubyObject sort_keys_set(ThreadContext context, IRubyObject sortKeys) { + checkFrozen(); + this.sortKeys = normalizeSortKeys(context, sortKeys); + return sortKeys; + } + public int getDepth() { return depth; } @@ -568,6 +609,9 @@ public IRubyObject _configure(ThreadContext context, IRubyObject vOpts) { this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false); this.deprecateDuplicateKey = false; } + + sortKeys = normalizeSortKeys(context, opts.get("sort_keys")); + return this; } @@ -596,6 +640,7 @@ public RubyHash to_h(ThreadContext context) { result.op_aset(context, runtime.newSymbol("strict"), strict_get(context)); result.op_aset(context, runtime.newSymbol("depth"), depth_get(context)); result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context)); + result.op_aset(context, runtime.newSymbol("sort_keys"), sort_keys_get(context)); if (this.allowDuplicateKey) { if (!this.deprecateDuplicateKey) { diff --git a/lib/json.rb b/lib/json.rb index f8dc4ccc..a5c065f0 100644 --- a/lib/json.rb +++ b/lib/json.rb @@ -408,7 +408,6 @@ # to be inserted after each \JSON object; defaults to the empty \String, ''. # - Option +indent+ (\String) specifies the string (usually spaces) to be # used for indentation; defaults to the empty \String, ''; -# defaults to the empty \String, ''; # has no effect unless options +array_nl+ or +object_nl+ specify newlines. # - Option +space+ (\String) specifies a string (usually a space) to be # inserted after the colon in each \JSON object's pair; @@ -416,6 +415,11 @@ # - Option +space_before+ (\String) specifies a string (usually a space) to be # inserted before the colon in each \JSON object's pair; # defaults to the empty \String, ''. +# - Option +sort_keys+ (boolean or \Proc) controls whether and how the keys of a +# hash are sorted when generating the output; defaults to false. +# When +true+, keys are sorted lexicographically. When a \Proc, +# it is used as a comparator and receives two [key, value] pairs, +# allowing for arbitrary sort orders. # # In this example, +obj+ is used first to generate the shortest # \JSON data (no whitespace), then again with all formatting options diff --git a/lib/json/ext/generator/state.rb b/lib/json/ext/generator/state.rb index e4f425af..3c1d2fb9 100644 --- a/lib/json/ext/generator/state.rb +++ b/lib/json/ext/generator/state.rb @@ -54,6 +54,7 @@ def to_h strict: strict?, depth: depth, buffer_initial_length: buffer_initial_length, + sort_keys: sort_keys } allow_duplicate_key = allow_duplicate_key? diff --git a/lib/json/truffle_ruby/generator.rb b/lib/json/truffle_ruby/generator.rb index 4fb1885e..1d5ff102 100644 --- a/lib/json/truffle_ruby/generator.rb +++ b/lib/json/truffle_ruby/generator.rb @@ -164,6 +164,7 @@ def initialize(opts = nil) @script_safe = false @strict = false @max_nesting = 100 + @sort_keys = false configure(opts) if opts end @@ -199,6 +200,11 @@ def initialize(opts = nil) # supported by the JSON spec will raise a JSON::GeneratorError attr_accessor :strict + # Controls key sorting in the generated JSON. If set to +true+, object + # keys are sorted by key lexicographically. If set to a Proc, it is + # used as a comparator receiving two [key, value] pairs. + attr_accessor :sort_keys + # :stopdoc: attr_reader :buffer_initial_length @@ -285,6 +291,7 @@ def configure(opts) @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) @as_json = opts[:as_json].to_proc if opts[:as_json] @ascii_only = opts[:ascii_only] if opts.key?(:ascii_only) + @sort_keys = opts[:sort_keys] if opts.key?(:sort_keys) @depth = opts[:depth] || 0 @buffer_initial_length ||= opts[:buffer_initial_length] @@ -349,9 +356,13 @@ def generate(obj, anIO = nil) depth = @depth if @indent.empty? and @space.empty? and @space_before.empty? and @object_nl.empty? and @array_nl.empty? and - !@ascii_only and !@script_safe and @max_nesting == 0 and (!@strict || Symbol === obj) + !@ascii_only and !@script_safe and @max_nesting == 0 and (!@strict || Symbol === obj) and !@sort_keys result = generate_json(obj, ''.dup) else + if @sort_keys + obj = (Proc === @sort_keys ? obj.sort(&@sort_keys) : obj.sort).to_h + end + result = obj.to_json(self) end JSON::TruffleRuby::Generator.valid_utf8?(result) or raise GeneratorError.new( diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 753ee0fb..c011aee7 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -190,6 +190,55 @@ def test_generate_pretty_custom JSON end + def test_generate_sort_keys + json = generate({2=>"a", 1=>"b", 3=>"c"}, sort_keys: true) + assert_equal('{"1":"b","2":"a","3":"c"}', json) + + json = generate({2=>"a", 1=>"b", 3=>"c"}, sort_keys: false) + assert_equal('{"2":"a","1":"b","3":"c"}', json) + + json = pretty_generate({2=>"a", 1=>"b", 3=>"c"}, sort_keys: true) + assert_equal(<<~'JSON'.chomp, json) + { + "1": "b", + "2": "a", + "3": "c" + } + JSON + + json = pretty_generate({2=>"a", 1=>"b", 3=>"c"}, sort_keys: false) + assert_equal(<<~'JSON'.chomp, json) + { + "2": "a", + "1": "b", + "3": "c" + } + JSON + + json = pretty_generate({2=>"a", 1=>"b", 3=>"c"}) + assert_equal(<<~'JSON'.chomp, json) + { + "2": "a", + "1": "b", + "3": "c" + } + JSON + end + + def test_generate_sort_keys_with_proc + reverse = ->(a, b) { b[0] <=> a[0] } + json = generate({2=>"a", 1=>"b", 3=>"c"}, sort_keys: reverse) + assert_equal('{"3":"c","2":"a","1":"b"}', json) + + by_value = ->(a, b) { a[1] <=> b[1] } + json = generate({2=>"c", 1=>"a", 3=>"b"}, sort_keys: by_value) + assert_equal('{"1":"a","3":"b","2":"c"}', json) + + state = State.new(sort_keys: reverse) + assert_same reverse, state.to_h[:sort_keys] + assert_equal('{"3":"c","2":"a","1":"b"}', state.generate({2=>"a", 1=>"b", 3=>"c"})) + end + def test_generate_custom state = State.new(:space_before => " ", :space => " ", :indent => "", :object_nl => "\n", :array_nl => "") json = generate({1=>{2=>3,4=>[5,6]}}, state) @@ -289,6 +338,7 @@ def test_state_defaults :object_nl => "", :space => "", :space_before => "", + :sort_keys => false, }.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s }) state = JSON::State.new(allow_duplicate_key: true) @@ -307,6 +357,7 @@ def test_state_defaults :object_nl => "", :space => "", :space_before => "", + :sort_keys => false, }.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s }) end