LUCENE-10499: reduce unnecessary copy data overhead when growing array size (#786)

Co-authored-by: xiaoping.wjp <xiaoping.wjp@alibaba-inc.com>
This commit is contained in:
xiaoping 2022-04-26 21:35:56 +08:00 committed by GitHub
parent 2966228fae
commit ebe2d7b4fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 47 additions and 13 deletions

View File

@ -204,7 +204,7 @@ public abstract class CompressionMode {
// pad with extra "dummy byte": see javadocs for using Inflater(true)
// we do it for compliance, but it's unnecessary for years in zlib.
final int paddedLength = compressedLength + 1;
compressed = ArrayUtil.grow(compressed, paddedLength);
compressed = ArrayUtil.growNoCopy(compressed, paddedLength);
in.readBytes(compressed, 0, compressedLength);
compressed[compressedLength] = 0; // explicitly set dummy byte to 0
@ -214,7 +214,7 @@ public abstract class CompressionMode {
decompressor.setInput(compressed, 0, paddedLength);
bytes.offset = bytes.length = 0;
bytes.bytes = ArrayUtil.grow(bytes.bytes, originalLength);
bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, originalLength);
try {
bytes.length = decompressor.inflate(bytes.bytes, bytes.length, originalLength);
} catch (DataFormatException e) {

View File

@ -81,7 +81,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
// pad with extra "dummy byte": see javadocs for using Inflater(true)
// we do it for compliance, but it's unnecessary for years in zlib.
final int paddedLength = compressedLength + 1;
compressed = ArrayUtil.grow(compressed, paddedLength);
compressed = ArrayUtil.growNoCopy(compressed, paddedLength);
in.readBytes(compressed, 0, compressedLength);
compressed[compressedLength] = 0; // explicitly set dummy byte to 0
@ -113,7 +113,7 @@ public final class DeflateWithPresetDictCompressionMode extends CompressionMode
}
final int dictLength = in.readVInt();
final int blockLength = in.readVInt();
bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength);
bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, dictLength);
bytes.offset = bytes.length = 0;
final Inflater decompressor = new Inflater(true);

View File

@ -74,8 +74,9 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
in.readVInt(); // compressed length of the dictionary, unused
int totalLength = dictLength;
int i = 0;
compressedLengths = ArrayUtil.growNoCopy(compressedLengths, originalLength / blockLength + 1);
while (totalLength < originalLength) {
compressedLengths = ArrayUtil.grow(compressedLengths, i + 1);
compressedLengths[i++] = in.readVInt();
totalLength += blockLength;
}
@ -97,7 +98,7 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
final int numBlocks = readCompressedLengths(in, originalLength, dictLength, blockLength);
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
buffer = ArrayUtil.growNoCopy(buffer, dictLength + blockLength);
bytes.length = 0;
// Read the dictionary
if (LZ4.decompress(in, dictLength, buffer, 0) != dictLength) {
@ -120,7 +121,7 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
in.skipBytes(numBytesToSkip);
} else {
// The dictionary contains some bytes we need, copy its content to the BytesRef
bytes.bytes = ArrayUtil.grow(bytes.bytes, dictLength);
bytes.bytes = ArrayUtil.growNoCopy(bytes.bytes, dictLength);
System.arraycopy(buffer, 0, bytes.bytes, 0, dictLength);
bytes.length = dictLength;
}
@ -169,7 +170,7 @@ public final class LZ4WithPresetDictCompressionMode extends CompressionMode {
public void compress(byte[] bytes, int off, int len, DataOutput out) throws IOException {
final int dictLength = len / (NUM_SUB_BLOCKS * DICT_SIZE_FACTOR);
final int blockLength = (len - dictLength + NUM_SUB_BLOCKS - 1) / NUM_SUB_BLOCKS;
buffer = ArrayUtil.grow(buffer, dictLength + blockLength);
buffer = ArrayUtil.growNoCopy(buffer, dictLength + blockLength);
out.writeVInt(dictLength);
out.writeVInt(blockLength);
final int end = off + len;

View File

@ -814,7 +814,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
int numBytes = payIn.readVInt();
if (numBytes > payloadBytes.length) {
payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
}
payIn.readBytes(payloadBytes, 0, numBytes);
} else {
@ -1799,7 +1799,7 @@ public final class Lucene90PostingsReader extends PostingsReaderBase {
int numBytes = payIn.readVInt();
if (numBytes > payloadBytes.length) {
payloadBytes = ArrayUtil.grow(payloadBytes, numBytes);
payloadBytes = ArrayUtil.growNoCopy(payloadBytes, numBytes);
}
payIn.readBytes(payloadBytes, 0, numBytes);
} else {

View File

@ -969,7 +969,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
// Write suffix lengths
final int numSuffixBytes = Math.toIntExact(suffixLengthsWriter.size());
spareBytes = ArrayUtil.grow(spareBytes, numSuffixBytes);
spareBytes = ArrayUtil.growNoCopy(spareBytes, numSuffixBytes);
suffixLengthsWriter.copyTo(new ByteArrayDataOutput(spareBytes));
suffixLengthsWriter.reset();
if (allEqual(spareBytes, 1, numSuffixBytes, spareBytes[0])) {

View File

@ -454,8 +454,8 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
sliced = (token & 1) != 0;
offsets = ArrayUtil.grow(offsets, chunkDocs + 1);
numStoredFields = ArrayUtil.grow(numStoredFields, chunkDocs);
offsets = ArrayUtil.growNoCopy(offsets, chunkDocs + 1);
numStoredFields = ArrayUtil.growNoCopy(numStoredFields, chunkDocs);
if (chunkDocs == 1) {
numStoredFields[0] = fieldsStream.readVInt();

View File

@ -337,6 +337,17 @@ public final class ArrayUtil {
} else return array;
}
/**
* Returns an array whose size is at least {@code minSize}, generally over-allocating
* exponentially, and it will not copy the origin data to the new array
*/
public static int[] growNoCopy(int[] array, int minSize) {
assert minSize >= 0 : "size must be positive (got " + minSize + "): likely integer overflow?";
if (array.length < minSize) {
return new int[oversize(minSize, Integer.BYTES)];
} else return array;
}
/** Returns a larger array, generally over-allocating exponentially */
public static int[] grow(int[] array) {
return grow(array, 1 + array.length);
@ -362,6 +373,17 @@ public final class ArrayUtil {
} else return array;
}
/**
* Returns an array whose size is at least {@code minSize}, generally over-allocating
* exponentially, and it will not copy the origin data to the new array
*/
public static long[] growNoCopy(long[] array, int minSize) {
assert minSize >= 0 : "size must be positive (got " + minSize + "): likely integer overflow?";
if (array.length < minSize) {
return new long[oversize(minSize, Long.BYTES)];
} else return array;
}
/** Returns a larger array, generally over-allocating exponentially */
public static long[] grow(long[] array) {
return grow(array, 1 + array.length);
@ -387,6 +409,17 @@ public final class ArrayUtil {
} else return array;
}
/**
* Returns an array whose size is at least {@code minSize}, generally over-allocating
* exponentially, and it will not copy the origin data to the new array
*/
public static byte[] growNoCopy(byte[] array, int minSize) {
assert minSize >= 0 : "size must be positive (got " + minSize + "): likely integer overflow?";
if (array.length < minSize) {
return new byte[oversize(minSize, Byte.BYTES)];
} else return array;
}
/** Returns a larger array, generally over-allocating exponentially */
public static byte[] grow(byte[] array) {
return grow(array, 1 + array.length);