mirror of https://github.com/apache/lucene.git
Reduce allocations in ByteBuffersDataOutput.writeString (#13863)
There's no need to allocate a byte array when serializing to heap buffers and the string fits the remaining capacity without further bounds checks. If it doesn't fit we could technically do better than the current `writeLongString` and avoid one round of copying by chunking the string but that might not be worth the complexity. In either case we can calculate the utf8 length up-front. While this costs extra cycles (in the small case) for iterating the string twice it saves creating an oftentimes 3x oversized byte array, a `BytesRef`, field reads from the `BytesRef`, copying from it to the buffer and the associated GC with cleaning it up. Theory and some quick benchmarking suggests this version is likely faster for any string length than the existing code.
This commit is contained in:
parent
e4092396d5
commit
25850923db
|
@ -31,7 +31,6 @@ import java.util.function.Consumer;
|
|||
import java.util.function.IntFunction;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.BitUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
|
||||
|
@ -415,12 +414,17 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
|
|||
@Override
|
||||
public void writeString(String v) {
|
||||
try {
|
||||
if (v.length() <= MAX_CHARS_PER_WINDOW) {
|
||||
final BytesRef utf8 = new BytesRef(v);
|
||||
writeVInt(utf8.length);
|
||||
writeBytes(utf8.bytes, utf8.offset, utf8.length);
|
||||
final int charCount = v.length();
|
||||
final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(v, 0, charCount);
|
||||
writeVInt(byteLen);
|
||||
ByteBuffer currentBlock = this.currentBlock;
|
||||
if (currentBlock.hasArray() && currentBlock.remaining() >= byteLen) {
|
||||
int startingPos = currentBlock.position();
|
||||
UnicodeUtil.UTF16toUTF8(
|
||||
v, 0, charCount, currentBlock.array(), currentBlock.arrayOffset() + startingPos);
|
||||
currentBlock.position(startingPos + byteLen);
|
||||
} else {
|
||||
writeLongString(v);
|
||||
writeLongString(byteLen, v);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException(e);
|
||||
|
@ -541,9 +545,7 @@ public final class ByteBuffersDataOutput extends DataOutput implements Accountab
|
|||
}
|
||||
|
||||
/** Writes a long string in chunks */
|
||||
private void writeLongString(final String s) throws IOException {
|
||||
final int byteLen = UnicodeUtil.calcUTF16toUTF8Length(s, 0, s.length());
|
||||
writeVInt(byteLen);
|
||||
private void writeLongString(int byteLen, final String s) throws IOException {
|
||||
final byte[] buf =
|
||||
new byte[Math.min(byteLen, UnicodeUtil.MAX_UTF8_BYTES_PER_CHAR * MAX_CHARS_PER_WINDOW)];
|
||||
for (int i = 0, end = s.length(); i < end; ) {
|
||||
|
|
Loading…
Reference in New Issue