HADOOP-17905. Modify Text.ensureCapacity() to efficiently max out the… (#3423)

This commit is contained in:
pbacsko 2021-09-30 02:25:29 +02:00 committed by GitHub
parent 2fda61fac6
commit 7097e5b793
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 13 additions and 3 deletions

View File

@ -34,7 +34,6 @@ import java.text.StringCharacterIterator;
import java.util.Arrays; import java.util.Arrays;
import org.apache.avro.reflect.Stringable; import org.apache.avro.reflect.Stringable;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
@ -73,6 +72,10 @@ public class Text extends BinaryComparable
} }
}; };
// max size of the byte array, seems to be a safe choice for multiple JVMs
// (see ArrayList.MAX_ARRAY_SIZE)
private static final int ARRAY_MAX_SIZE = Integer.MAX_VALUE - 8;
private static final byte[] EMPTY_BYTES = new byte[0]; private static final byte[] EMPTY_BYTES = new byte[0];
private byte[] bytes = EMPTY_BYTES; private byte[] bytes = EMPTY_BYTES;
@ -302,8 +305,15 @@ public class Text extends BinaryComparable
private boolean ensureCapacity(final int capacity) { private boolean ensureCapacity(final int capacity) {
if (bytes.length < capacity) { if (bytes.length < capacity) {
// Try to expand the backing array by the factor of 1.5x // Try to expand the backing array by the factor of 1.5x
// (by taking the current size + diving it by half) // (by taking the current size + diving it by half).
int targetSize = Math.max(capacity, bytes.length + (bytes.length >> 1)); //
// If the calculated value is beyond the size
// limit, we cap it to ARRAY_MAX_SIZE
long targetSizeLong = bytes.length + (bytes.length >> 1);
int targetSize = (int)Math.min(targetSizeLong, ARRAY_MAX_SIZE);
targetSize = Math.max(capacity, targetSize);
bytes = new byte[targetSize]; bytes = new byte[targetSize];
return true; return true;
} }