Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@

### Unreleased

* Add new `allow_duplicate_key` generator options. By default a warning is now emitted when a duplicated key is encountered.
In `json 3.0` an error will be raised.
```ruby
>> Warning[:deprecated] = true
>> puts JSON.generate({ foo: 1, "foo" => 2 })
(irb):2: warning: detected duplicate key "foo" in {foo: 1, "foo" => 2}.
This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`
{"foo":1,"foo":2}
>> JSON.generate({ foo: 1, "foo" => 2 }, allow_duplicate_key: false)
detected duplicate key "foo" in {foo: 1, "foo" => 2} (JSON::GeneratorError)
```
* Fix `JSON.generate` `strict: true` mode to also restrict hash keys.
* Fix `JSON::Coder` to also invoke block for hash keys that aren't strings nor symbols.

Expand Down
8 changes: 8 additions & 0 deletions ext/json/ext/fbuffer/fbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ typedef unsigned char _Bool;
#endif
#endif

#ifndef NOINLINE
#if defined(__has_attribute) && __has_attribute(noinline)
#define NOINLINE() __attribute__((noinline))
#else
#define NOINLINE()
#endif
#endif

#ifndef RB_UNLIKELY
#define RB_UNLIKELY(expr) expr
#endif
Expand Down
73 changes: 67 additions & 6 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@

/* ruby api and some helpers */

enum duplicate_key_action {
JSON_DEPRECATED = 0,
JSON_IGNORE,
JSON_RAISE,
};

typedef struct JSON_Generator_StateStruct {
VALUE indent;
VALUE space;
Expand All @@ -21,6 +27,8 @@ typedef struct JSON_Generator_StateStruct {
long depth;
long buffer_initial_length;

enum duplicate_key_action on_duplicate_key;

bool allow_nan;
bool ascii_only;
bool script_safe;
Expand All @@ -34,7 +42,7 @@ typedef struct JSON_Generator_StateStruct {
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;

static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;


Expand Down Expand Up @@ -987,8 +995,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
}

struct hash_foreach_arg {
VALUE hash;
struct generate_json_data *data;
int iter;
int first_key_type;
bool first;
bool mixed_keys_encountered;
};

static VALUE
Expand All @@ -1006,6 +1017,22 @@ convert_string_subclass(VALUE key)
return key_to_s;
}

NOINLINE()
static void
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
{
if (arg->mixed_keys_encountered) {
return;
}
arg->mixed_keys_encountered = true;

JSON_Generator_State *state = arg->data->state;
if (state->on_duplicate_key != JSON_IGNORE) {
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
}
}

static int
json_object_i(VALUE key, VALUE val, VALUE _arg)
{
Expand All @@ -1016,8 +1043,16 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
JSON_Generator_State *state = data->state;

long depth = state->depth;
int key_type = rb_type(key);

if (arg->first) {
arg->first = false;
arg->first_key_type = key_type;
}
else {
fbuffer_append_char(buffer, ',');
}

if (arg->iter > 0) fbuffer_append_char(buffer, ',');
if (RB_UNLIKELY(data->state->object_nl)) {
fbuffer_append_str(buffer, data->state->object_nl);
}
Expand All @@ -1029,21 +1064,30 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
bool as_json_called = false;

start:
switch (rb_type(key)) {
switch (key_type) {
case T_STRING:
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
json_inspect_hash_with_mixed_keys(arg);
}

if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
key_to_s = key;
} else {
key_to_s = convert_string_subclass(key);
}
break;
case T_SYMBOL:
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
json_inspect_hash_with_mixed_keys(arg);
}

key_to_s = rb_sym2str(key);
break;
default:
if (data->state->strict) {
if (RTEST(data->state->as_json) && !as_json_called) {
key = rb_proc_call_with_block(data->state->as_json, 1, &key, Qnil);
key_type = rb_type(key);
as_json_called = true;
goto start;
} else {
Expand All @@ -1064,7 +1108,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
generate_json(buffer, data, val);

arg->iter++;
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting rid of this increment does offset the small overhead:

== Encoding activitypub.json (52595 bytes)
ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after     3.019k i/100ms
Calculating -------------------------------------
               after     30.785k (± 2.5%) i/s   (32.48 μs/i) -    153.969k in   5.004680s

Comparison:
              before:    30570.1 i/s
               after:    30784.6 i/s - same-ish: difference falls within error


== Encoding citm_catalog.json (500298 bytes)
ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   159.000 i/100ms
Calculating -------------------------------------
               after      1.630k (± 0.6%) i/s  (613.58 μs/i) -      8.268k in   5.073274s

Comparison:
              before:     1603.6 i/s
               after:     1629.8 i/s - 1.02x  faster


== Encoding twitter.json (466906 bytes)
ruby 3.4.2 (2025-02-15 revision d2930f8e7a) +YJIT +PRISM [arm64-darwin24]
Warming up --------------------------------------
               after   308.000 i/100ms
Calculating -------------------------------------
               after      3.088k (± 0.6%) i/s  (323.86 μs/i) -     15.708k in   5.087390s

Comparison:
              before:     3041.1 i/s
               after:     3087.8 i/s - 1.02x  faster

return ST_CONTINUE;
}

Expand All @@ -1091,8 +1134,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
fbuffer_append_char(buffer, '{');

struct hash_foreach_arg arg = {
.hash = obj,
.data = data,
.iter = 0,
.first = true,
};
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);

Expand Down Expand Up @@ -1794,6 +1838,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
return Qnil;
}

static VALUE cState_allow_duplicate_key_p(VALUE self)
{
GET_STATE(self);
switch (state->on_duplicate_key) {
case JSON_IGNORE:
return Qtrue;
case JSON_DEPRECATED:
return Qnil;
case JSON_RAISE:
return Qfalse;
}
}

/*
* call-seq: depth
*
Expand Down Expand Up @@ -1883,6 +1940,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
else if (key == sym_strict) { state->strict = RTEST(val); }
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_as_json) {
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
state_write_value(data, &state->as_json, proc);
Expand Down Expand Up @@ -2008,6 +2066,8 @@ void Init_generator(void)
rb_define_method(cState, "generate", cState_generate, -1);
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:

rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);

rb_define_singleton_method(cState, "generate", cState_m_generate, 3);

VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
Expand Down Expand Up @@ -2072,6 +2132,7 @@ void Init_generator(void)
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
sym_strict = ID2SYM(rb_intern("strict"));
sym_as_json = ID2SYM(rb_intern("as_json"));
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));

usascii_encindex = rb_usascii_encindex();
utf8_encindex = rb_utf8_encindex();
Expand Down
2 changes: 1 addition & 1 deletion ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -1314,7 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
else if (key == sym_freeze) { config->freeze = RTEST(val); }
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
else if (key == sym_decimal_class) {
if (RTEST(val)) {
if (rb_respond_to(val, i_try_convert)) {
Expand Down
66 changes: 66 additions & 0 deletions java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,66 @@ void generate(ThreadContext context, final Session session, RubyHash object, fin
}
}

private static class HashKeyTracker {
enum KeyType {
UNKNOWN,
STRING,
SYMBOL,
};

private KeyType keyType;
private boolean done;
private RubyHash hash;

private HashKeyTracker(RubyHash hash) {
this.hash = hash;
this.done = false;
this.keyType = KeyType.UNKNOWN;
}

public void trackFirst(ThreadContext context, Session session, IRubyObject key) {
if (key instanceof RubyString) {
this.keyType = KeyType.STRING;
} else if (key.getType() == context.runtime.getSymbol()) {
this.keyType = KeyType.SYMBOL;
} else {
this.done = true;
report(context, session);
}
}

public void track(ThreadContext context, Session session, IRubyObject key) {
if (!done) {
if (keyType == KeyType.STRING) {
if (!(key instanceof RubyString)) {
this.report(context, session);
}
} else {
if (!(key.getType() == context.runtime.getSymbol())) {
this.report(context, session);
}
}
}
}

private void report(ThreadContext context, Session session) {
this.done = true;

final RuntimeInfo info = session.getInfo(context);
final GeneratorState state = session.getState(context);

if (!state.getAllowDuplicateKey()) {
if (state.getDeprecateDuplicateKey()) {
IRubyObject args[] = new IRubyObject[]{hash, context.getRuntime().getFalse()};
info.jsonModule.get().callMethod(context, "on_mixed_keys_hash", args);
} else {
IRubyObject args[] = new IRubyObject[]{hash, context.getRuntime().getTrue()};
info.jsonModule.get().callMethod(context, "on_mixed_keys_hash", args);
}
}
}
}

static void generateHash(ThreadContext context, Session session, RubyHash object, OutputStream buffer) throws IOException {
final GeneratorState state = session.getState(context);
final int depth = state.increaseDepth(context);
Expand All @@ -508,7 +568,13 @@ static void generateHash(ThreadContext context, Session session, RubyHash object
buffer.write(objectNLBytes);

boolean firstPair = true;
HashKeyTracker tracker = new HashKeyTracker(object);
for (RubyHash.RubyHashEntry entry : (Set<RubyHash.RubyHashEntry>) object.directEntrySet()) {
if (firstPair) {
tracker.trackFirst(context, session, (IRubyObject)entry.getKey());
} else {
tracker.track(context, session, (IRubyObject)entry.getKey());
}
processEntry(context, session, buffer, entry, firstPair, objectNl, indent, spaceBefore, space);
firstPair = false;
}
Expand Down
39 changes: 39 additions & 0 deletions java/src/json/ext/GeneratorState.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@
* @author mernen
*/
public class GeneratorState extends RubyObject {
private boolean allowDuplicateKey = false;
private boolean deprecateDuplicateKey = true;

/**
* The indenting unit string. Will be repeated several times for larger
* indenting levels.
Expand Down Expand Up @@ -216,6 +219,10 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) {
this.strict = orig.strict;
this.bufferInitialLength = orig.bufferInitialLength;
this.depth = orig.depth;

this.allowDuplicateKey = orig.allowDuplicateKey;
this.deprecateDuplicateKey = orig.deprecateDuplicateKey;

return this;
}

Expand Down Expand Up @@ -479,6 +486,24 @@ private ByteList prepareByteList(ThreadContext context, IRubyObject value) {
return str.getByteList().dup();
}

@JRubyMethod(name="allow_duplicate_key?", visibility=Visibility.PRIVATE)
public IRubyObject allow_duplicate_key_p(ThreadContext context) {
if (allowDuplicateKey) {
return context.runtime.getTrue();
} else if (deprecateDuplicateKey) {
return context.runtime.getNil();
}
return context.runtime.getFalse();
}

public boolean getAllowDuplicateKey() {
return allowDuplicateKey;
}

public boolean getDeprecateDuplicateKey() {
return deprecateDuplicateKey;
}

/**
* <code>State#configure(opts)</code>
*
Expand Down Expand Up @@ -520,6 +545,10 @@ public IRubyObject _configure(ThreadContext context, IRubyObject vOpts) {

depth = opts.getInt("depth", 0);

if (opts.hasKey("allow_duplicate_key")) {
this.allowDuplicateKey = opts.getBool("allow_duplicate_key", false);
this.deprecateDuplicateKey = false;
}
return this;
}

Expand Down Expand Up @@ -548,6 +577,16 @@ public RubyHash to_h(ThreadContext context) {
result.op_aset(context, runtime.newSymbol("strict"), strict_get(context));
result.op_aset(context, runtime.newSymbol("depth"), depth_get(context));
result.op_aset(context, runtime.newSymbol("buffer_initial_length"), buffer_initial_length_get(context));

if (this.allowDuplicateKey) {
if (!this.deprecateDuplicateKey) {
result.op_aset(context, runtime.newSymbol("allow_duplicate_key"), runtime.getTrue());
}
}
else {
result.op_aset(context, runtime.newSymbol("allow_duplicate_key"), runtime.getFalse());
}

for (String name: getInstanceVariableNameList()) {
result.op_aset(context, runtime.newSymbol(name.substring(1)), getInstanceVariables().getInstanceVariable(name));
}
Expand Down
Loading
Loading