LUCENE-2217: cutover more places where arrays grow to ArrayUtil.oversize/grow

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@904044 13f79535-47bb-0310-9956-ffa450edef68
2025-03-04 07:19:18 +00:00 · 2010-01-28 11:40:16 +00:00 · 2010-01-28 11:40:16 +00:00 · ff24afcae7
commit ff24afcae7
parent c745b824ae
11 changed files with 84 additions and 64 deletions
--- a/src/java/org/apache/lucene/analysis/PorterStemmer.java
+++ b/src/java/org/apache/lucene/analysis/PorterStemmer.java
@ -44,7 +44,12 @@ package org.apache.lucene.analysis;
 */


-import java.io.*;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.FileInputStream;
+
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_CHAR;
+import org.apache.lucene.util.ArrayUtil;

 /**
 *
@ -61,11 +66,10 @@ class PorterStemmer
  private int i,    /* offset into b */
    j, k, k0;
  private boolean dirty = false;
-  private static final int INC = 50; /* unit of size whereby b is increased */
-  private static final int EXTRA = 1;
+  private static final int INITIAL_SIZE = 50;

  public PorterStemmer() {
-    b = new char[INC];
+    b = new char[INITIAL_SIZE];
    i = 0;
  }

@ -81,10 +85,8 @@ class PorterStemmer
   * adding characters, you can call stem(void) to process the word.
   */
  public void add(char ch) {
-    if (b.length <= i + EXTRA) {
-      char[] new_b = new char[b.length+INC];
-      System.arraycopy(b, 0, new_b, 0, b.length);
-      b = new_b;
+    if (b.length <= i) {
+      b = ArrayUtil.grow(b, i+1);
    }
    b[i++] = ch;
  }
@ -451,8 +453,7 @@ class PorterStemmer
  public boolean stem(char[] wordBuffer, int offset, int wordLen) {
    reset();
    if (b.length < wordLen) {
-      char[] new_b = new char[wordLen + EXTRA];
-      b = new_b;
+      b = new char[ArrayUtil.oversize(wordLen, NUM_BYTES_CHAR)];
    }
    System.arraycopy(wordBuffer, offset, b, 0, wordLen);
    i = wordLen;
--- a/src/java/org/apache/lucene/index/ByteBlockPool.java
+++ b/src/java/org/apache/lucene/index/ByteBlockPool.java
@ -34,6 +34,9 @@ package org.apache.lucene.index;
 * hit a non-zero byte. */

 import java.util.Arrays;
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+

 final class ByteBlockPool {

@ -83,7 +86,8 @@ final class ByteBlockPool {

  public void nextBuffer() {
    if (1+bufferUpto == buffers.length) {
-      byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
+      byte[][] newBuffers = new byte[ArrayUtil.oversize(buffers.length+1,
+                                                        NUM_BYTES_OBJECT_REF)][];
      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
      buffers = newBuffers;
    }
--- a/src/java/org/apache/lucene/index/CharBlockPool.java
+++ b/src/java/org/apache/lucene/index/CharBlockPool.java
@ -17,6 +17,9 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+import org.apache.lucene.util.ArrayUtil;
+
 final class CharBlockPool {

  public char[][] buffers = new char[10][];
@ -42,7 +45,8 @@ final class CharBlockPool {

  public void nextBuffer() {
    if (1+bufferUpto == buffers.length) {
-      char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
+      char[][] newBuffers = new char[ArrayUtil.oversize(buffers.length+1,
+                                                        NUM_BYTES_OBJECT_REF)][];
      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
      buffers = newBuffers;
    }
--- a/src/java/org/apache/lucene/index/MultipleTermPositions.java
+++ b/src/java/org/apache/lucene/index/MultipleTermPositions.java
@ -17,14 +17,14 @@ package org.apache.lucene.index;
 * limitations under the License.
 */

-import org.apache.lucene.util.PriorityQueue;
-
 import java.io.IOException;
 import java.util.Arrays;
-
 import java.util.LinkedList;
 import java.util.List;

+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.ArrayUtil;
+
 /**
 * Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as
 * a single {@link TermPositions}.
@ -83,10 +83,8 @@ public class MultipleTermPositions implements TermPositions {
    }

    private void growArray() {
-      int[] newArray = new int[_arraySize * 2];
-      System.arraycopy(_array, 0, newArray, 0, _arraySize);
-      _array = newArray;
-      _arraySize *= 2;
+      _array = ArrayUtil.grow(_array, _arraySize+1);
+      _arraySize = _array.length;
    }
  }

--- a/src/java/org/apache/lucene/index/TermInfosWriter.java
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@ -22,6 +22,8 @@ import java.io.IOException;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.ArrayUtil;
+

 /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
  Directory.  A TermInfos can be written once, in order.  */
@ -207,9 +209,7 @@ final class TermInfosWriter {
    output.writeBytes(termBytes, start, length);  // write delta bytes
    output.writeVInt(fieldNumber); // write field num
    if (lastTermBytes.length < termBytesLength) {
-      byte[] newArray = new byte[(int) (termBytesLength*1.5)];
-      System.arraycopy(lastTermBytes, 0, newArray, 0, start);
-      lastTermBytes = newArray;
+      lastTermBytes = ArrayUtil.grow(lastTermBytes, termBytesLength);
    }
    System.arraycopy(termBytes, start, lastTermBytes, start, length);
    lastTermBytesLength = termBytesLength;
--- a/src/java/org/apache/lucene/index/TermVectorsReader.java
+++ b/src/java/org/apache/lucene/index/TermVectorsReader.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.BufferedIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.ArrayUtil;

 import java.io.IOException;
 import java.util.Arrays;
@ -460,18 +461,14 @@ class TermVectorsReader implements Cloneable {
      if (preUTF8) {
        // Term stored as java chars
        if (charBuffer.length < totalLength) {
-          char[] newCharBuffer = new char[(int) (1.5*totalLength)];
-          System.arraycopy(charBuffer, 0, newCharBuffer, 0, start);
-          charBuffer = newCharBuffer;
+          charBuffer = ArrayUtil.grow(charBuffer, totalLength);
        }
        tvf.readChars(charBuffer, start, deltaLength);
        term = new String(charBuffer, 0, totalLength);
      } else {
        // Term stored as utf8 bytes
        if (byteBuffer.length < totalLength) {
-          byte[] newByteBuffer = new byte[(int) (1.5*totalLength)];
-          System.arraycopy(byteBuffer, 0, newByteBuffer, 0, start);
-          byteBuffer = newByteBuffer;
+          byteBuffer = ArrayUtil.grow(byteBuffer, totalLength);
        }
        tvf.readBytes(byteBuffer, start, deltaLength);
        term = new String(byteBuffer, 0, totalLength, "UTF-8");
--- a/src/java/org/apache/lucene/store/IndexInput.java
+++ b/src/java/org/apache/lucene/store/IndexInput.java
@ -22,6 +22,9 @@ import java.io.Closeable;
 import java.util.Map;
 import java.util.HashMap;

+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
 /** Abstract base class for input from a file in a {@link Directory}.  A
 * random-access input stream.  Used for all Lucene index input operations.
 * @see Directory
@ -122,16 +125,18 @@ public abstract class IndexInput implements Cloneable,Closeable {
    if (preUTF8Strings)
      return readModifiedUTF8String();
    int length = readVInt();
-    if (bytes == null || length > bytes.length)
-      bytes = new byte[(int) (length*1.25)];
+    if (bytes == null || length > bytes.length) {
+      bytes = new byte[ArrayUtil.oversize(length, 1)];
+    }
    readBytes(bytes, 0, length);
    return new String(bytes, 0, length, "UTF-8");
  }

  private String readModifiedUTF8String() throws IOException {
    int length = readVInt();
-    if (chars == null || length > chars.length)
-      chars = new char[length];
+    if (chars == null || length > chars.length) {
+      chars = new char[ArrayUtil.oversize(length, RamUsageEstimator.NUM_BYTES_CHAR)];
+    }
    readChars(chars, 0, length);
    return new String(chars, 0, length);
  }
@ -157,10 +162,11 @@ public abstract class IndexInput implements Cloneable,Closeable {
      else if ((b & 0xE0) != 0xE0) {
 	buffer[i] = (char)(((b & 0x1F) << 6)
 		 | (readByte() & 0x3F));
-      } else
+      } else {
 	buffer[i] = (char)(((b & 0x0F) << 12)
 		| ((readByte() & 0x3F) << 6)
 	        |  (readByte() & 0x3F));
+      }
    }
  }

@ -181,10 +187,9 @@ public abstract class IndexInput implements Cloneable,Closeable {
      byte b = readByte();
      if ((b & 0x80) == 0){
        //do nothing, we only need one byte
-      }
-      else if ((b & 0xE0) != 0xE0) {
+      } else if ((b & 0xE0) != 0xE0) {
        readByte();//read an additional byte
-      } else{      
+      } else {      
        //read two additional bytes.
        readByte();
        readByte();
--- a/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/src/java/org/apache/lucene/util/ArrayUtil.java
@ -289,6 +289,29 @@ public final class ArrayUtil {
      return array;
  }

+  public static char[] grow(char[] array, int minSize) {
+    if (array.length < minSize) {
+      char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)];
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static char[] grow(char[] array) {
+    return grow(array, 1 + array.length);
+  }
+
+  public static char[] shrink(char[] array, int targetSize) {
+    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR);
+    if (newSize != array.length) {
+      char[] newArray = new char[newSize];
+      System.arraycopy(array, 0, newArray, 0, newSize);
+      return newArray;
+    } else
+      return array;
+  }
+
  /**
   * Returns hash of chars in range start (inclusive) to
   * end (inclusive)
--- a/src/java/org/apache/lucene/util/SortedVIntList.java
+++ b/src/java/org/apache/lucene/util/SortedVIntList.java
@ -128,8 +128,8 @@ public class SortedVIntList extends DocIdSet {
      }
  
      if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
-        // biggest possible int does not fit
-        resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
+        // Biggest possible int does not fit.
+        resizeBytes(ArrayUtil.oversize(lastBytePos + MAX_BYTES_PER_INT, 1));
      }
  
      // See org.apache.lucene.store.IndexOutput.writeVInt()
--- a/src/java/org/apache/lucene/util/UnicodeUtil.java
+++ b/src/java/org/apache/lucene/util/UnicodeUtil.java
@ -78,9 +78,7 @@ final public class UnicodeUtil {

    public void setLength(int newLength) {
      if (result.length < newLength) {
-        byte[] newArray = new byte[(int) (1.5*newLength)];
-        System.arraycopy(result, 0, newArray, 0, length);
-        result = newArray;
+        result = ArrayUtil.grow(result, newLength);
      }
      length = newLength;
    }
@ -93,9 +91,7 @@ final public class UnicodeUtil {

    public void setLength(int newLength) {
      if (result.length < newLength) {
-        char[] newArray = new char[(int) (1.5*newLength)];
-        System.arraycopy(result, 0, newArray, 0, length);
-        result = newArray;
+        result = ArrayUtil.grow(result, newLength);
      }
      length = newLength;
    }
@ -120,10 +116,7 @@ final public class UnicodeUtil {
      final int code = (int) source[i++];

      if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
      }
      if (code < 0x80)
        out[upto++] = (byte) code;
@ -179,10 +172,7 @@ final public class UnicodeUtil {
      final int code = (int) source[i++];

      if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
      }
      if (code < 0x80)
        out[upto++] = (byte) code;
@ -233,10 +223,7 @@ final public class UnicodeUtil {
      final int code = (int) s.charAt(i);

      if (upto+4 > out.length) {
-        byte[] newOut = new byte[2*out.length];
-        assert newOut.length >= upto+4;
-        System.arraycopy(out, 0, newOut, 0, upto);
-        result.result = out = newOut;
+        out = result.result = ArrayUtil.grow(out, upto+4);
      }
      if (code < 0x80)
        out[upto++] = (byte) code;
@ -283,9 +270,7 @@ final public class UnicodeUtil {
    final int end = offset + length;
    char[] out = result.result;
    if (result.offsets.length <= end) {
-      int[] newOffsets = new int[2*end];
-      System.arraycopy(result.offsets, 0, newOffsets, 0, result.offsets.length);
-      result.offsets  = newOffsets;
+      result.offsets = ArrayUtil.grow(result.offsets, end+1);
    }
    final int[] offsets = result.offsets;

@ -299,9 +284,7 @@ final public class UnicodeUtil {

    // Pre-allocate for worst case 1-for-1
    if (outUpto+length >= out.length) {
-      char[] newOut = new char[2*(outUpto+length)];
-      System.arraycopy(out, 0, newOut, 0, outUpto);
-      result.result = out = newOut;
+      out = result.result = ArrayUtil.grow(out, outUpto+length+1);
    }

    while (upto < end) {
--- a/src/test/org/apache/lucene/util/TestSortedVIntList.java
+++ b/src/test/org/apache/lucene/util/TestSortedVIntList.java
@ -20,8 +20,6 @@ package org.apache.lucene.util;
 import java.io.IOException;
 import java.util.BitSet;

-import org.apache.lucene.util.LuceneTestCase;
-
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;

@ -193,4 +191,11 @@ public class TestSortedVIntList extends LuceneTestCase {
  public void test12() {
   tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
  }
+  public void test13Allocation() throws Exception {
+    int [] a = new int[2000]; // SortedVIntList initial byte size is 128
+    for (int i = 0; i < a.length; i++) {
+      a[i] = (107 + i) * i;
+    }
+    tstIterator(new SortedVIntList(a), a);
+  }
 }