LUCENE-2217: cutover more places where arrays grow to ArrayUtil.oversize/grow

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@904044 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-01-28 11:40:16 +00:00
parent c745b824ae
commit ff24afcae7
11 changed files with 84 additions and 64 deletions

View File

@ -44,7 +44,12 @@ package org.apache.lucene.analysis;
*/
import java.io.*;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
import org.apache.lucene.util.ArrayUtil;
/**
*
@ -61,11 +66,10 @@ class PorterStemmer
private int i, /* offset into b */
j, k, k0;
private boolean dirty = false;
private static final int INC = 50; /* unit of size whereby b is increased */
private static final int EXTRA = 1;
private static final int INITIAL_SIZE = 50;
public PorterStemmer() {
b = new char[INC];
b = new char[INITIAL_SIZE];
i = 0;
}
@ -81,10 +85,8 @@ class PorterStemmer
* adding characters, you can call stem(void) to process the word.
*/
public void add(char ch) {
if (b.length <= i + EXTRA) {
char[] new_b = new char[b.length+INC];
System.arraycopy(b, 0, new_b, 0, b.length);
b = new_b;
if (b.length <= i) {
b = ArrayUtil.grow(b, i+1);
}
b[i++] = ch;
}
@ -451,8 +453,7 @@ class PorterStemmer
public boolean stem(char[] wordBuffer, int offset, int wordLen) {
reset();
if (b.length < wordLen) {
char[] new_b = new char[wordLen + EXTRA];
b = new_b;
b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
}
System.arraycopy(wordBuffer, offset, b, 0, wordLen);
i = wordLen;

View File

@ -34,6 +34,9 @@ package org.apache.lucene.index;
* hit a non-zero byte. */
import java.util.Arrays;
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
import org.apache.lucene.util.ArrayUtil;
final class ByteBlockPool {
@ -83,7 +86,8 @@ final class ByteBlockPool {
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
NUM_BYTES_OBJECT_REF)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}

View File

@ -17,6 +17,9 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
import org.apache.lucene.util.ArrayUtil;
final class CharBlockPool {
public char[][] buffers = new char[10][];
@ -42,7 +45,8 @@ final class CharBlockPool {
public void nextBuffer() {
if (1+bufferUpto == buffers.length) {
char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
char[][] newBuffers = new char[ArrayUtil.oversize(buffers.length+1,
NUM_BYTES_OBJECT_REF)][];
System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
buffers = newBuffers;
}

View File

@ -17,14 +17,14 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.ArrayUtil;
/**
* Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as
* a single {@link TermPositions}.
@ -83,10 +83,8 @@ public class MultipleTermPositions implements TermPositions {
}
private void growArray() {
int[] newArray = new int[_arraySize * 2];
System.arraycopy(_array, 0, newArray, 0, _arraySize);
_array = newArray;
_arraySize *= 2;
_array = ArrayUtil.grow(_array, _arraySize+1);
_arraySize = _array.length;
}
}

View File

@ -22,6 +22,8 @@ import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.ArrayUtil;
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
Directory. A TermInfos can be written once, in order. */
@ -207,9 +209,7 @@ final class TermInfosWriter {
output.writeBytes(termBytes, start, length); // write delta bytes
output.writeVInt(fieldNumber); // write field num
if (lastTermBytes.length < termBytesLength) {
byte[] newArray = new byte[(int) (termBytesLength*1.5)];
System.arraycopy(lastTermBytes, 0, newArray, 0, start);
lastTermBytes = newArray;
lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
}
System.arraycopy(termBytes, start, lastTermBytes, start, length);
lastTermBytesLength = termBytesLength;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import java.io.IOException;
import java.util.Arrays;
@ -460,18 +461,14 @@ class TermVectorsReader implements Cloneable {
if (preUTF8) {
// Term stored as java chars
if (charBuffer.length < totalLength) {
char[] newCharBuffer = new char[(int) (1.5*totalLength)];
System.arraycopy(charBuffer, 0, newCharBuffer, 0, start);
charBuffer = newCharBuffer;
charBuffer = ArrayUtil.grow(charBuffer, totalLength);
}
tvf.readChars(charBuffer, start, deltaLength);
term = new String(charBuffer, 0, totalLength);
} else {
// Term stored as utf8 bytes
if (byteBuffer.length < totalLength) {
byte[] newByteBuffer = new byte[(int) (1.5*totalLength)];
System.arraycopy(byteBuffer, 0, newByteBuffer, 0, start);
byteBuffer = newByteBuffer;
byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
}
tvf.readBytes(byteBuffer, start, deltaLength);
term = new String(byteBuffer, 0, totalLength, "UTF-8");

View File

@ -22,6 +22,9 @@ import java.io.Closeable;
import java.util.Map;
import java.util.HashMap;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
/** Abstract base class for input from a file in a {@link Directory}. A
* random-access input stream. Used for all Lucene index input operations.
* @see Directory
@ -122,16 +125,18 @@ public abstract class IndexInput implements Cloneable,Closeable {
if (preUTF8Strings)
return readModifiedUTF8String();
int length = readVInt();
if (bytes == null || length > bytes.length)
bytes = new byte[(int) (length*1.25)];
if (bytes == null || length > bytes.length) {
bytes = new byte[ArrayUtil.oversize(length, 1)];
}
readBytes(bytes, 0, length);
return new String(bytes, 0, length, "UTF-8");
}
private String readModifiedUTF8String() throws IOException {
int length = readVInt();
if (chars == null || length > chars.length)
chars = new char[length];
if (chars == null || length > chars.length) {
chars = new char[ArrayUtil.oversize(length, RamUsageEstimator.NUM_BYTES_CHAR)];
}
readChars(chars, 0, length);
return new String(chars, 0, length);
}
@ -157,10 +162,11 @@ public abstract class IndexInput implements Cloneable,Closeable {
else if ((b & 0xE0) != 0xE0) {
buffer[i] = (char)(((b & 0x1F) << 6)
| (readByte() & 0x3F));
} else
} else {
buffer[i] = (char)(((b & 0x0F) << 12)
| ((readByte() & 0x3F) << 6)
| (readByte() & 0x3F));
}
}
}
@ -181,10 +187,9 @@ public abstract class IndexInput implements Cloneable,Closeable {
byte b = readByte();
if ((b & 0x80) == 0){
//do nothing, we only need one byte
}
else if ((b & 0xE0) != 0xE0) {
} else if ((b & 0xE0) != 0xE0) {
readByte();//read an additional byte
} else{
} else {
//read two additional bytes.
readByte();
readByte();

View File

@ -289,6 +289,29 @@ public final class ArrayUtil {
return array;
}
public static char[] grow(char[] array, int minSize) {
if (array.length < minSize) {
char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)];
System.arraycopy(array, 0, newArray, 0, array.length);
return newArray;
} else
return array;
}
public static char[] grow(char[] array) {
return grow(array, 1 + array.length);
}
public static char[] shrink(char[] array, int targetSize) {
final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR);
if (newSize != array.length) {
char[] newArray = new char[newSize];
System.arraycopy(array, 0, newArray, 0, newSize);
return newArray;
} else
return array;
}
/**
* Returns hash of chars in range start (inclusive) to
* end (inclusive)

View File

@ -128,8 +128,8 @@ public class SortedVIntList extends DocIdSet {
}
if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
// biggest possible int does not fit
resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
// Biggest possible int does not fit.
resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
}
// See org.apache.lucene.store.IndexOutput.writeVInt()

View File

@ -78,9 +78,7 @@ final public class UnicodeUtil {
public void setLength(int newLength) {
if (result.length < newLength) {
byte[] newArray = new byte[(int) (1.5*newLength)];
System.arraycopy(result, 0, newArray, 0, length);
result = newArray;
result = ArrayUtil.grow(result, newLength);
}
length = newLength;
}
@ -93,9 +91,7 @@ final public class UnicodeUtil {
public void setLength(int newLength) {
if (result.length < newLength) {
char[] newArray = new char[(int) (1.5*newLength)];
System.arraycopy(result, 0, newArray, 0, length);
result = newArray;
result = ArrayUtil.grow(result, newLength);
}
length = newLength;
}
@ -120,10 +116,7 @@ final public class UnicodeUtil {
final int code = (int) source[i++];
if (upto+4 > out.length) {
byte[] newOut = new byte[2*out.length];
assert newOut.length >= upto+4;
System.arraycopy(out, 0, newOut, 0, upto);
result.result = out = newOut;
out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@ -179,10 +172,7 @@ final public class UnicodeUtil {
final int code = (int) source[i++];
if (upto+4 > out.length) {
byte[] newOut = new byte[2*out.length];
assert newOut.length >= upto+4;
System.arraycopy(out, 0, newOut, 0, upto);
result.result = out = newOut;
out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@ -233,10 +223,7 @@ final public class UnicodeUtil {
final int code = (int) s.charAt(i);
if (upto+4 > out.length) {
byte[] newOut = new byte[2*out.length];
assert newOut.length >= upto+4;
System.arraycopy(out, 0, newOut, 0, upto);
result.result = out = newOut;
out = result.result = ArrayUtil.grow(out, upto+4);
}
if (code < 0x80)
out[upto++] = (byte) code;
@ -283,9 +270,7 @@ final public class UnicodeUtil {
final int end = offset + length;
char[] out = result.result;
if (result.offsets.length <= end) {
int[] newOffsets = new int[2*end];
System.arraycopy(result.offsets, 0, newOffsets, 0, result.offsets.length);
result.offsets = newOffsets;
result.offsets = ArrayUtil.grow(result.offsets, end+1);
}
final int[] offsets = result.offsets;
@ -299,9 +284,7 @@ final public class UnicodeUtil {
// Pre-allocate for worst case 1-for-1
if (outUpto+length >= out.length) {
char[] newOut = new char[2*(outUpto+length)];
System.arraycopy(out, 0, newOut, 0, outUpto);
result.result = out = newOut;
out = result.result = ArrayUtil.grow(out, outUpto+length+1);
}
while (upto < end) {

View File

@ -20,8 +20,6 @@ package org.apache.lucene.util;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.util.LuceneTestCase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
@ -193,4 +191,11 @@ public class TestSortedVIntList extends LuceneTestCase {
public void test12() {
tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
}
public void test13Allocation() throws Exception {
int [] a = new int[2000]; // SortedVIntList initial byte size is 128
for (int i = 0; i < a.length; i++) {
a[i] = (107 + i) * i;
}
tstIterator(new SortedVIntList(a), a);
}
}