From 0ac137a9a11766c41cedb0832f3f336342358c13 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 22 Jul 2019 07:09:32 +0200 Subject: [PATCH] Optimize some StreamOutput Operations (#44660) (#44668) * Optimize some StreamOutput Operations * Writing numbers byte by byte adds a lot of unnecessary bounds checks to serialization * Serializing to a threadlocal `byte[]` instead and bulk writing gives about a 50% speedup on `long` and `vlong` (for large numbers) writes and 30% for `int`, `vint` on Linux on an i9 * Using a threadlocal of the maximum string buffer size we used to allocate before also removes allocations when writing strings in general since we now never have to allocate a `byte[]` for that * And don't have to GC one either resolving the TODO removed here --- .../common/io/stream/StreamOutput.java | 63 +++++++++++-------- 1 file changed, 38 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index a1e7cd9afff..f07cc46a3c1 100644 --- a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -24,7 +24,6 @@ import org.apache.lucene.index.IndexFormatTooNewException; import org.apache.lucene.index.IndexFormatTooOldException; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; @@ -232,19 +231,25 @@ public abstract class StreamOutput extends OutputStream { write(bytes.bytes, bytes.offset, bytes.length); } + private static final ThreadLocal scratch = ThreadLocal.withInitial(() -> new byte[1024]); + public final void writeShort(short v) throws IOException { - writeByte((byte) (v >> 8)); - writeByte((byte) v); + final byte[] buffer = scratch.get(); + buffer[0] = (byte) (v >> 8); + buffer[1] = (byte) v; + writeBytes(buffer, 0, 2); } /** * Writes an int as four bytes. */ public void writeInt(int i) throws IOException { - writeByte((byte) (i >> 24)); - writeByte((byte) (i >> 16)); - writeByte((byte) (i >> 8)); - writeByte((byte) i); + final byte[] buffer = scratch.get(); + buffer[0] = (byte) (i >> 24); + buffer[1] = (byte) (i >> 16); + buffer[2] = (byte) (i >> 8); + buffer[3] = (byte) i; + writeBytes(buffer, 0, 4); } /** @@ -254,19 +259,30 @@ public abstract class StreamOutput extends OutputStream { * using {@link #writeInt} */ public void writeVInt(int i) throws IOException { + final byte[] buffer = scratch.get(); + int index = 0; while ((i & ~0x7F) != 0) { - writeByte((byte) ((i & 0x7f) | 0x80)); + buffer[index++] = ((byte) ((i & 0x7f) | 0x80)); i >>>= 7; } - writeByte((byte) i); + buffer[index++] = ((byte) i); + writeBytes(buffer, 0, index); } /** * Writes a long as eight bytes. */ public void writeLong(long i) throws IOException { - writeInt((int) (i >> 32)); - writeInt((int) i); + final byte[] buffer = scratch.get(); + buffer[0] = (byte) (i >> 56); + buffer[1] = (byte) (i >> 48); + buffer[2] = (byte) (i >> 40); + buffer[3] = (byte) (i >> 32); + buffer[4] = (byte) (i >> 24); + buffer[5] = (byte) (i >> 16); + buffer[6] = (byte) (i >> 8); + buffer[7] = (byte) i; + writeBytes(buffer, 0, 8); } /** @@ -286,11 +302,14 @@ public abstract class StreamOutput extends OutputStream { * {@link #writeVLong(long)} instead. */ void writeVLongNoCheck(long i) throws IOException { + final byte[] buffer = scratch.get(); + int index = 0; while ((i & ~0x7F) != 0) { - writeByte((byte) ((i & 0x7f) | 0x80)); + buffer[index++] = ((byte) ((i & 0x7f) | 0x80)); i >>>= 7; } - writeByte((byte) i); + buffer[index++] = ((byte) i); + writeBytes(buffer, 0, index); } /** @@ -301,13 +320,16 @@ public abstract class StreamOutput extends OutputStream { * If the numbers are known to be non-negative, use {@link #writeVLong(long)} */ public void writeZLong(long i) throws IOException { + final byte[] buffer = scratch.get(); + int index = 0; // zig-zag encoding cf. https://developers.google.com/protocol-buffers/docs/encoding?hl=en long value = BitUtil.zigZagEncode(i); while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) { - writeByte((byte)((value & 0x7F) | 0x80)); + buffer[index++] = (byte) ((value & 0x7F) | 0x80); value >>>= 7; } - writeByte((byte) (value & 0x7F)); + buffer[index++] = (byte) (value & 0x7F); + writeBytes(buffer, 0, index); } public void writeOptionalLong(@Nullable Long l) throws IOException { @@ -394,18 +416,9 @@ public abstract class StreamOutput extends OutputStream { } } - // we use a small buffer to convert strings to bytes since we want to prevent calling writeByte - // for every byte in the string (see #21660 for details). - // This buffer will never be the oversized limit of 1024 bytes and will not be shared across streams - private byte[] convertStringBuffer = BytesRef.EMPTY_BYTES; // TODO should we reduce it to 0 bytes once the stream is closed? - public void writeString(String str) throws IOException { final int charCount = str.length(); - final int bufferSize = Math.min(3 * charCount, 1024); // at most 3 bytes per character is needed here - if (convertStringBuffer.length < bufferSize) { // we don't use ArrayUtils.grow since copying the bytes is unnecessary - convertStringBuffer = new byte[ArrayUtil.oversize(bufferSize, Byte.BYTES)]; - } - byte[] buffer = convertStringBuffer; + byte[] buffer = scratch.get(); int offset = 0; writeVInt(charCount); for (int i = 0; i < charCount; i++) {