SOLR-1220: use doubling strategy for keeping track of the number of each term in UnInvertedField

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@785258 13f79535-47bb-0310-9956-ffa450edef68
2009-06-16 15:34:07 +00:00 · 2009-06-16 15:34:07 +00:00 · 409f39e8bd
parent c0f6146779
commit 409f39e8bd
1 changed files with 11 additions and 3 deletions
--- a/src/java/org/apache/solr/request/UnInvertedField.java
+++ b/src/java/org/apache/solr/request/UnInvertedField.java
@ -223,9 +223,9 @@ public class UnInvertedField {
      int termNum = te.getTermNumber();
      if (termNum >= maxTermCounts.length) {
-        // resize, but conserve memory by not doubling
+        // resize by doubling - for very large number of unique terms, expanding
-        // resize at end??? we waste a maximum of 16K (average of 8K)
+        // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
-        int[] newMaxTermCounts = new int[maxTermCounts.length+4096];
+        int[] newMaxTermCounts = new int[maxTermCounts.length*2];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
        maxTermCounts = newMaxTermCounts;
      }
@ -332,6 +332,14 @@ public class UnInvertedField {
    numTermsInField = te.getTermNumber();
    te.close();
    // free space if outrageously wasteful (tradeoff memory/cpu) 
    if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
      int[] newMaxTermCounts = new int[numTermsInField];
      System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
      maxTermCounts = newMaxTermCounts;
   }
    long midPoint = System.currentTimeMillis();
    if (termInstances == 0) {