From 25850923dbdd708a671cff7b0e1480287224f027 Mon Sep 17 00:00:00 2001
From: Armin Braun <me@obrown.io>
Date: Wed, 9 Oct 2024 09:09:00 +0200
Subject: [PATCH] Reduce allocations in ByteBuffersDataOutput.writeString
 (#13863)

There's no need to allocate a byte array when serializing to heap
buffers and the string fits the remaining capacity without further bounds checks.
If it doesn't fit we could technically do better than the current
`writeLongString` and avoid one round of copying by chunking the string
but that might not be worth the complexity.
In either case we can calculate the utf8 length up-front.
While this costs extra cycles (in the small case) for iterating the string twice it saves
creating an oftentimes 3x oversized byte array, a `BytesRef`, field
reads from the `BytesRef`, copying from it to the buffer and the associated GC with cleaning it up.
Theory and some quick benchmarking suggests this version is likely faster for any string
length than the existing code.
---
 .../lucene/store/ByteBuffersDataOutput.java   | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
index eaa0929848d..1c6bcd63629 100644
--- a/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ByteBuffersDataOutput.java
@@ -31,7 +31,6 @@ import java.util.function.Consumer;
 import java.util.function.IntFunction;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BitUtil;
-import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.UnicodeUtil;
 
@@ -415,12 +414,17 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
   @Override
   public void writeString(String v) {
     try {
-      if (v.length() <= MAX_CHARS_PER_WINDOW) {
-        final BytesRef utf8 = new BytesRef(v);
-        writeVInt(utf8.length);
-        writeBytes(utf8.bytes, utf8.offset, utf8.length);
+      final int charCount = v.length();
+      final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(v, 0, charCount);
+      writeVInt(byteLen);
+      ByteBuffer currentBlock = this.currentBlock;
+      if (currentBlock.hasArray() && currentBlock.remaining() >= byteLen) {
+        int startingPos = currentBlock.position();
+        UnicodeUtil.UTF16toUTF8(
+            v, 0, charCount, currentBlock.array(), currentBlock.arrayOffset() + startingPos);
+        currentBlock.position(startingPos + byteLen);
       } else {
-        writeLongString(v);
+        writeLongString(byteLen, v);
       }
     } catch (IOException e) {
       throw new UncheckedIOException(e);
@@ -541,9 +545,7 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
   }
 
   /** Writes a long string in chunks */
-  private void writeLongString(final String s) throws IOException {
-    final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(s, 0, s.length());
-    writeVInt(byteLen);
+  private void writeLongString(int byteLen, final String s) throws IOException {
     final byte[] buf =
         new byte[Math.min(byteLen, UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * MAX_CHARS_PER_WINDOW)];
     for (int i = 0, end = s.length(); i < end; ) {