ruby · byroot · Aug 28, 2025 · Aug 12, 2025 · Aug 13, 2025 · Aug 14, 2025
diff --git a/Rakefile b/Rakefile
@@ -116,11 +116,11 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
   file JRUBY_GENERATOR_JAR => :compile do
     cd 'java/src' do
       generator_classes = FileList[
-        "json/ext/ByteList*.class",
+        "json/ext/*ByteList*.class",
         "json/ext/OptionsReader*.class",
         "json/ext/Generator*.class",
         "json/ext/RuntimeInfo*.class",
-        "json/ext/StringEncoder*.class",
+        "json/ext/*StringEncoder*.class",
         "json/ext/Utils*.class"
       ]
       sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes

diff --git a/java/src/json/ext/AbstractByteListDirectOutputStream.java b/java/src/json/ext/AbstractByteListDirectOutputStream.java
@@ -0,0 +1,31 @@
+package json.ext;
+
+import java.io.OutputStream;
+
+import org.jcodings.Encoding;
+import org.jruby.util.ByteList;
+
+abstract class AbstractByteListDirectOutputStream extends OutputStream {
+
+    private static final String PROP_SEGMENTED_BUFFER = "jruby.json.useSegmentedOutputStream";
+    private static final String PROP_SEGMENTED_BUFFER_DEFAULT = "true";
+
+    private static final boolean USE_SEGMENTED_BUFFER;
+
+    static {
+        String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
+        USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
+        // XXX Is there a logger we can use here?
+        // System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
+    }
+
+    public static AbstractByteListDirectOutputStream create(int estimatedSize) {
+        if (USE_SEGMENTED_BUFFER) {
+            return new SegmentedByteListDirectOutputStream(estimatedSize);
+        } else {
+            return new ByteListDirectOutputStream(estimatedSize);
+        }
+    }
+
+    public abstract ByteList toByteListDirect(Encoding encoding);
+}
diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java
@@ -4,10 +4,9 @@
 import org.jruby.util.ByteList;
 
 import java.io.IOException;
-import java.io.OutputStream;
 import java.util.Arrays;
 
-public class ByteListDirectOutputStream extends OutputStream {
+public class ByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
     private byte[] buffer;
     private int length;
 

diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java
@@ -26,6 +26,8 @@
 import org.jruby.util.IOOutputStream;
 import org.jruby.util.TypeConverter;
 
+import json.ext.ByteListDirectOutputStream;
+
 import java.io.BufferedOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -232,7 +234,7 @@ public StringEncoder getStringEncoder(ThreadContext context) {
                 GeneratorState state = getState(context);
                 stringEncoder = state.asciiOnly() ?
                         new StringEncoderAsciiOnly(state.scriptSafe()) :
-                        new StringEncoder(state.scriptSafe());
+                        (state.scriptSafe()) ? new StringEncoder(state.scriptSafe()) : StringEncoder.createBasicEncoder();
             }
             return stringEncoder;
         }
@@ -252,7 +254,7 @@ int guessSize(ThreadContext context, Session session, T object) {
         }
 
         RubyString generateNew(ThreadContext context, Session session, T object) {
-            ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(context, session, object));
+            AbstractByteListDirectOutputStream buffer = AbstractByteListDirectOutputStream.create(guessSize(context, session, object));
             generateToBuffer(context, session, object, buffer);
             return RubyString.newString(context.runtime, buffer.toByteListDirect(UTF8Encoding.INSTANCE));
         }

diff --git a/java/src/json/ext/SWARBasicStringEncoder.java b/java/src/json/ext/SWARBasicStringEncoder.java
@@ -0,0 +1,86 @@
+package json.ext;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.jruby.util.ByteList;
+
+public class SWARBasicStringEncoder extends StringEncoder {
+
+    public SWARBasicStringEncoder() {
+        super(ESCAPE_TABLE);
+    }
+
+    @Override
+    void encode(ByteList src) throws IOException {
+        byte[] hexdig = HEX;
+        byte[] scratch = aux;
+
+        byte[] ptrBytes = src.unsafeBytes();
+        int ptr = src.begin();
+        int len = src.realSize();
+
+        int beg = 0;
+        int pos = 0;
+
+        ByteBuffer bb = ByteBuffer.wrap(ptrBytes, 0, len);
+        while (pos + 8 <= len) {
+            long x = bb.getLong(ptr + pos);
+            if (skipChunk(x)) {
+                pos += 8;
+                continue;
+            }
+            int chunkEnd = pos + 8;
+            while (pos < chunkEnd) {
+                int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+                int ch_len = ESCAPE_TABLE[ch];
+                if (ch_len > 0) {
+                    beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                    escapeAscii(ch, scratch, hexdig);
+                } else {
+                    pos++;
+                }
+            }
+        }
+
+        if (pos + 4 <= len) {
+            int x = bb.getInt(ptr + pos);
+            if (skipChunk(x)) {
+                pos += 4;
+            }
+        }
+
+        while (pos < len) {
+            int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+            int ch_len = ESCAPE_TABLE[ch];
+            if (ch_len > 0) {
+                beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                escapeAscii(ch, scratch, hexdig);
+            } else {
+                pos++;
+            }
+        }
+
+        if (beg < len) {
+            append(ptrBytes, ptr + beg, len - beg);
+        }
+    }
+
+    private boolean skipChunk(long x) {
+        long is_ascii = 0x8080808080808080L & ~x;
+        long xor2 = x ^ 0x0202020202020202L;
+        long lt32_or_eq34 = xor2 - 0x2121212121212121L;
+        long sub92 = x ^ 0x5C5C5C5C5C5C5C5CL;
+        long eq92 = (sub92 - 0x0101010101010101L);
+        return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
+    }
+
+    private boolean skipChunk(int x) {
+        int is_ascii = 0x80808080 & ~x;
+        int xor2 = x ^ 0x02020202;
+        int lt32_or_eq34 = xor2 - 0x21212121;
+        int sub92 = x ^ 0x5C5C5C5C;
+        int eq92 = (sub92 - 0x01010101);
+        return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
+    }
+}
diff --git a/java/src/json/ext/SegmentedByteListDirectOutputStream.java b/java/src/json/ext/SegmentedByteListDirectOutputStream.java
@@ -0,0 +1,84 @@
+package json.ext;
+
+import org.jcodings.Encoding;
+import org.jruby.util.ByteList;
+
+import java.io.IOException;
+
+public class SegmentedByteListDirectOutputStream extends AbstractByteListDirectOutputStream {
+    private static final int DEFAULT_CAPACITY = 1024;
+
+    private int totalLength;
+    // Why 21? The minimum segment size is 1024 bytes. If we double the segment size each time 
+    // we need a new segment, we only need 21 segments to reach the maximum array size in Java.
+    private byte[][] segments = new byte[21][];
+    private int currentSegmentIndex;
+    private int currentSegmentLength;
+    private byte[] currentSegment;
+
+    SegmentedByteListDirectOutputStream(int size) {
+        currentSegment = new byte[Math.max(size, DEFAULT_CAPACITY)];
+        segments[0] = currentSegment;
+    }
+
+    public ByteList toByteListDirect(Encoding encoding) {
+        byte[] buffer = new byte[totalLength];
+        int pos = 0;
+        // We handle the current segment separately.
+        for (int i = 0; i < currentSegmentIndex; i++) {
+            byte[] segment = segments[i];
+            System.arraycopy(segment, 0, buffer, pos, segment.length);
+            pos += segment.length;
+        }
+        System.arraycopy(currentSegment, 0, buffer, pos, currentSegmentLength);
+        return new ByteList(buffer, 0, totalLength, encoding, false);
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        if (currentSegmentLength == currentSegment.length) {
+            if (totalLength + 1 < 0) {
+                throw new IOException("Total length exceeds maximum length of an array.");
+            }
+            currentSegmentIndex++;
+            int capacity = currentSegment.length * 2;
+            capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
+            currentSegment = new byte[capacity];
+            currentSegmentLength = 0;
+            segments[currentSegmentIndex] = currentSegment;
+        }
+        currentSegment[currentSegmentLength++] = (byte) b;
+        totalLength++;
+    }
+
+    @Override
+    public void write(byte[] bytes, int start, int length) throws IOException {
+        int remaining = length;
+
+        while (remaining > 0) {
+            if (currentSegmentLength == currentSegment.length) {
+                if (totalLength + remaining < 0) {
+                    throw new IOException("Total length exceeds maximum length of an array.");
+                }
+                currentSegmentIndex++;
+                int capacity = currentSegment.length << 1;
+                capacity = (capacity < 0) ? DEFAULT_CAPACITY : capacity;
+                capacity = (capacity < remaining) ? remaining : capacity;
+                currentSegment = new byte[capacity];
+                currentSegmentLength = 0;
+                segments[currentSegmentIndex] = currentSegment;
+            }
+            int toWrite = Math.min(remaining, currentSegment.length - currentSegmentLength);
+            System.arraycopy(bytes, start, currentSegment, currentSegmentLength, toWrite);
+            currentSegmentLength += toWrite;
+            start += toWrite;
+            remaining -= toWrite;
+        }
+        totalLength += length;
+    }
+
+    @Override
+    public void write(byte[] bytes) throws IOException {
+        write(bytes, 0, bytes.length);
+    }
+}
diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java
@@ -5,6 +5,10 @@
  */
 package json.ext;
 
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.charset.StandardCharsets;
+
 import org.jcodings.Encoding;
 import org.jcodings.specific.ASCIIEncoding;
 import org.jcodings.specific.USASCIIEncoding;
@@ -17,10 +21,6 @@
 import org.jruby.util.ByteList;
 import org.jruby.util.StringSupport;
 
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-
 /**
  * An encoder that reads from the given source and outputs its representation
  * to another ByteList. The source string is fully checked for UTF-8 validity,
@@ -114,6 +114,17 @@ class StringEncoder extends ByteListTranscoder {
 
     protected final byte[] escapeTable;
 
+    private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
+    private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
+    private static final boolean USE_BASIC_SWAR_ENCODER;
+
+    static {
+        USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
+            System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
+        // XXX Is there a logger we can use here?
+        // System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
+    }
+
     OutputStream out;
 
     // Escaped characters will reuse this array, to avoid new allocations
@@ -138,6 +149,14 @@ class StringEncoder extends ByteListTranscoder {
         this.escapeTable = escapeTable;
     }
 
+    static StringEncoder createBasicEncoder() {
+        if (USE_BASIC_SWAR_ENCODER) {
+            return new SWARBasicStringEncoder();
+        } else {
+            return new StringEncoder(false);
+        }
+    }
+
     // C: generate_json_string
     void generate(ThreadContext context, RubyString object, OutputStream buffer) throws IOException {
         object = ensureValidEncoding(context, object);
@@ -198,8 +217,40 @@ private static RubyString tryWeirdEncodings(ThreadContext context, RubyString st
         return str;
     }
 
+    void encodeBasic(ByteList src) throws IOException {
+        byte[] hexdig = HEX;
+        byte[] scratch = aux;
+
+        byte[] ptrBytes = src.unsafeBytes();
+        int ptr = src.begin();
+        int len = src.realSize();
+
+        int beg = 0;
+        int pos = 0;
+
+        while (pos < len) {
+            int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
+            int ch_len = ESCAPE_TABLE[ch];
+            if (ch_len > 0) {
+                beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
+                escapeAscii(ch, scratch, hexdig);
+            } else {
+                pos++;
+            }
+        }
+
+        if (beg < len) {
+            append(ptrBytes, ptr + beg, len - beg);
+        }
+    }
+
     // C: convert_UTF8_to_JSON
     void encode(ByteList src) throws IOException {
+        if (escapeTable == ESCAPE_TABLE) {
+            encodeBasic(src);
+            return;
+        }
+
         byte[] hexdig = HEX;
         byte[] scratch = aux;
         byte[] escapeTable = this.escapeTable;

diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb
@@ -504,6 +504,18 @@ def test_backslash
     json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]'
     assert_equal json, generate(data)
     #
+    data = '"""""'
+    json = '"\"\"\"\"\""'
+    assert_equal json, generate(data)
+    #
+    data = "abc\n"
+    json = '"abc\\n"'
+    assert_equal json, generate(data)
+    #
+    data = "\nabc"
+    json = '"\\nabc"'
+    assert_equal json, generate(data)
+    #
     data = ["'"]
     json = '["\\\'"]'
     assert_equal '["\'"]', generate(data)