Skip to content
4 changes: 2 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,11 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
file JRUBY_GENERATOR_JAR => :compile do
cd 'java/src' do
generator_classes = FileList[
"json/ext/ByteList*.class",
"json/ext/*ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/Generator*.class",
"json/ext/RuntimeInfo*.class",
"json/ext/StringEncoder*.class",
"json/ext/*StringEncoder*.class",
"json/ext/Utils*.class"
]
sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes
Expand Down
31 changes: 31 additions & 0 deletions java/src/json/ext/AbstractByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package json.ext;

import java.io.OutputStream;

import org.jcodings.Encoding;
import org.jruby.util.ByteList;

abstract class AbstractByteListDirectOutputStream extends OutputStream {

private static final String PROP_SEGMENTED_BUFFER = "jruby.json.useSegmentedOutputStream";
private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true";

private static final boolean USE_SEGMENTED_BUFFER;

static {
String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
// XXX Is there a logger we can use here?
// System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
}

public static AbstractByteListDirectOutputStream create(int estimatedSize) {
if (USE_SEGMENTED_BUFFER) {
return new SegmentedByteListDirectOutputStream(estimatedSize);
} else {
return new ByteListDirectOutputStream(estimatedSize);
}
}

public abstract ByteList toByteListDirect(Encoding encoding);
}
3 changes: 1 addition & 2 deletions java/src/json/ext/ByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
import org.jruby.util.ByteList;

import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;

public class ByteListDirectOutputStream extends OutputStream {
public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
private byte[] buffer;
private int length;

Expand Down
6 changes: 4 additions & 2 deletions java/src/json/ext/Generator.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import org.jruby.util.IOOutputStream;
import org.jruby.util.TypeConverter;

import json.ext.ByteListDirectOutputStream;

import java.io.BufferedOutputStream;
import java.io.IOException;
import java.io.OutputStream;
Expand Down Expand Up @@ -232,7 +234,7 @@ public StringEncoder getStringEncoder(ThreadContext context) {
GeneratorState state = getState(context);
stringEncoder = state.asciiOnly() ?
new StringEncoderAsciiOnly(state.scriptSafe()) :
new StringEncoder(state.scriptSafe());
(state.scriptSafe()) ? new StringEncoder(state.scriptSafe()) : StringEncoder.createBasicEncoder();
}
return stringEncoder;
}
Expand All @@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) {
}

RubyString generateNew(ThreadContext context, Session session, T object) {
ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object));
AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object));
generateToBuffer(context, session, object, buffer);
return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE));
}
Expand Down
86 changes: 86 additions & 0 deletions java/src/json/ext/SWARBasicStringEncoder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package json.ext;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.jruby.util.ByteList;

public class SWARBasicStringEncoder extends StringEncoder {

public SWARBasicStringEncoder() {
super(ESCAPE_TABLE);
}

@Override
void encode(ByteList src) throws IOException {
byte[] hexdig = HEX;
byte[] scratch = aux;

byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();

int beg = 0;
int pos = 0;

ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len);
while (pos + 8 <= len) {
long x = bb.getLong(ptr + pos);
if (skipChunk(x)) {
pos += 8;
continue;
}
int chunkEnd = pos + 8;
while (pos < chunkEnd) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}
}

if (pos + 4 <= len) {
int x = bb.getInt(ptr + pos);
if (skipChunk(x)) {
pos += 4;
}
}

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}

private boolean skipChunk(long x) {
long is_ascii = 0x8080808080808080L & ~x;
long xor2 = x ^ 0x0202020202020202L;
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL;
long eq92 = (sub92 - 0x0101010101010101L);
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
}

private boolean skipChunk(int x) {
int is_ascii = 0x80808080 & ~x;
int xor2 = x ^ 0x02020202;
int lt32_or_eq34 = xor2 - 0x21212121;
int sub92 = x ^ 0x5C5C5C5C;
int eq92 = (sub92 - 0x01010101);
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
}
}
84 changes: 84 additions & 0 deletions java/src/json/ext/SegmentedByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package json.ext;

import org.jcodings.Encoding;
import org.jruby.util.ByteList;

import java.io.IOException;

public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
private static final int DEFAULT_CAPACITY = 1024;

private int totalLength;
// Why 21? The minimum segment size is 1024 bytes. If we double the segment size each time
// we need a new segment, we only need 21 segments to reach the maximum array size in Java.
private byte[][] segments = new byte[21][];
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why 21? The minimum segment size is 1024 for the first segment. The code doubles the segment size for each additional segment. Based on this doubling, we only need 21 segments before we hit Integer.MAX_VALUE.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. 👏

Maybe a comment or well-named constant so nobody else asks that question in the future?

private int currentSegmentIndex;
private int currentSegmentLength;
private byte[] currentSegment;

SegmentedByteListDirectOutputStream(int size) {
currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)];
segments[0] = currentSegment;
}

public ByteList toByteListDirect(Encoding encoding) {
byte[] buffer = new byte[totalLength];
int pos = 0;
// We handle the current segment separately.
for (int i = 0; i < currentSegmentIndex; i++) {
byte[] segment = segments[i];
System.arraycopy(segment, 0, buffer, pos, segment.length);
pos += segment.length;
}
System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength);
return new ByteList(buffer, 0, totalLength, encoding, false);
}

@Override
public void write(int b) throws IOException {
if (currentSegmentLength == currentSegment.length) {
if (totalLength + 1 < 0) {
throw new IOException("Total length exceeds maximum length of an array.");
}
currentSegmentIndex++;
int capacity = currentSegment.length * 2;
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
currentSegment = new byte[capacity];
currentSegmentLength = 0;
segments[currentSegmentIndex] = currentSegment;
}
currentSegment[currentSegmentLength++] = (byte) b;
totalLength++;
}

@Override
public void write(byte[] bytes, int start, int length) throws IOException {
int remaining = length;

while (remaining > 0) {
if (currentSegmentLength == currentSegment.length) {
if (totalLength + remaining < 0) {
throw new IOException("Total length exceeds maximum length of an array.");
}
currentSegmentIndex++;
int capacity = currentSegment.length << 1;
capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
capacity = (capacity < remaining) ? remaining : capacity;
currentSegment = new byte[capacity];
currentSegmentLength = 0;
segments[currentSegmentIndex] = currentSegment;
}
int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength);
System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite);
currentSegmentLength += toWrite;
start += toWrite;
remaining -= toWrite;
}
totalLength += length;
}

@Override
public void write(byte[] bytes) throws IOException {
write(bytes, 0, bytes.length);
}
}
59 changes: 55 additions & 4 deletions java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
*/
package json.ext;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

import org.jcodings.Encoding;
import org.jcodings.specific.ASCIIEncoding;
import org.jcodings.specific.USASCIIEncoding;
Expand All @@ -17,10 +21,6 @@
import org.jruby.util.ByteList;
import org.jruby.util.StringSupport;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

/**
* An encoder that reads from the given source and outputs its representation
* to another ByteList. The source string is fully checked for UTF-8 validity,
Expand Down Expand Up @@ -114,6 +114,17 @@ class StringEncoder extends ByteListTranscoder {

protected final byte[] escapeTable;

private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
private static final boolean USE_BASIC_SWAR_ENCODER;

static {
USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
// XXX Is there a logger we can use here?
// System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
}

OutputStream out;

// Escaped characters will reuse this array, to avoid new allocations
Expand All @@ -138,6 +149,14 @@ class StringEncoder extends ByteListTranscoder {
this.escapeTable = escapeTable;
}

static StringEncoder createBasicEncoder() {
if (USE_BASIC_SWAR_ENCODER) {
return new SWARBasicStringEncoder();
} else {
return new StringEncoder(false);
}
}

// C: generate_json_string
void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException {
object = ensureValidEncoding(context, object);
Expand Down Expand Up @@ -198,8 +217,40 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
return str;
}

void encodeBasic(ByteList src) throws IOException {
byte[] hexdig = HEX;
byte[] scratch = aux;

byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();

int beg = 0;
int pos = 0;

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, scratch, hexdig);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}

// C: convert_UTF8_to_JSON
void encode(ByteList src) throws IOException {
if (escapeTable == ESCAPE_TABLE) {
encodeBasic(src);
return;
}

byte[] hexdig = HEX;
byte[] scratch = aux;
byte[] escapeTable = this.escapeTable;
Expand Down
12 changes: 12 additions & 0 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,18 @@ def test_backslash
json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
assert_equal json, generate(data)
#
data = '"""""'
json = '"\"\"\"\"\""'
assert_equal json, generate(data)
#
data = "abc\n"
json = '"abc\\n"'
assert_equal json, generate(data)
#
data = "\nabc"
json = '"\\nabc"'
assert_equal json, generate(data)
#
data = ["'"]
json = '["\\\'"]'
assert_equal '["\'"]', generate(data)
Expand Down
Loading