SOLR-1220: use doubling strategy for keeping track of the number of each term in UnInvertedField

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@785258 13f79535-47bb-0310-9956-ffa450edef68
2009-06-16 15:34:07 +00:00 · 2009-06-16 15:34:07 +00:00 · 409f39e8bd
parent c0f6146779
commit 409f39e8bd
1 changed files with 11 additions and 3 deletions
--- a/src/java/org/apache/solr/request/UnInvertedField.java
+++ b/src/java/org/apache/solr/request/UnInvertedField.java
@ -223,9 +223,9 @@ public class UnInvertedField {
      int termNum = te.getTermNumber();

      if (termNum >= maxTermCounts.length) {
-        // resize, but conserve memory by not doubling
-        // resize at end??? we waste a maximum of 16K (average of 8K)
-        int[] newMaxTermCounts = new int[maxTermCounts.length+4096];
+        // resize by doubling - for very large number of unique terms, expanding
+        // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
+        int[] newMaxTermCounts = new int[maxTermCounts.length*2];
        System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
        maxTermCounts = newMaxTermCounts;
      }
@ -332,6 +332,14 @@ public class UnInvertedField {
    numTermsInField = te.getTermNumber();
    te.close();

+    // free space if outrageously wasteful (tradeoff memory/cpu) 
+
+    if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
+      int[] newMaxTermCounts = new int[numTermsInField];
+      System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
+      maxTermCounts = newMaxTermCounts;
+   }
+
    long midPoint = System.currentTimeMillis();

    if (termInstances == 0) {