Save allocating some zero length byte arrays (#13608)

Something I found in a heap dump. For large numbers of `FieldReader` where the minimum term is an empty string, we allocate MBs worth of empty `byte[]` in ES. Worth adding the conditional here I think.
2024-07-24 21:53:30 +02:00 · 2024-07-24 21:53:30 +02:00 · 4c1d50d8e8
parent acbd714140
commit 4c1d50d8e8
2 changed files with 3 additions and 4 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
@ -270,9 +270,8 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
      throw new CorruptIndexException("invalid bytes length: " + numBytes, in);
    }

-    BytesRef bytes = new BytesRef();
+    BytesRef bytes = new BytesRef(numBytes);
    bytes.length = numBytes;
-    bytes.bytes = new byte[numBytes];
    in.readBytes(bytes.bytes, 0, numBytes);

    return bytes;
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
@ -68,7 +68,7 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
   * both be zero.
   */
  public BytesRef(int capacity) {
-    this.bytes = new byte[capacity];
+    this.bytes = capacity == 0 ? EMPTY_BYTES : new byte[capacity];
  }

  /**
@ -77,7 +77,7 @@ public final class BytesRef implements Comparable<BytesRef>, Cloneable {
   * @param text This must be well-formed unicode text, with no unpaired surrogates.
   */
  public BytesRef(CharSequence text) {
-    this(new byte[UnicodeUtil.maxUTF8Length(text.length())]);
+    this(UnicodeUtil.maxUTF8Length(text.length()));
    length = UnicodeUtil.UTF16toUTF8(text, 0, text.length(), bytes);
  }