SOLR-1220: use doubling strategy for keeping track of the number of each term in UnInvertedField

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@785258 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-06-16 15:34:07 +00:00
parent c0f6146779
commit 409f39e8bd
1 changed files with 11 additions and 3 deletions

View File

@ -223,9 +223,9 @@ public class UnInvertedField {
int termNum = te.getTermNumber(); int termNum = te.getTermNumber();
if (termNum >= maxTermCounts.length) { if (termNum >= maxTermCounts.length) {
// resize, but conserve memory by not doubling // resize by doubling - for very large number of unique terms, expanding
// resize at end??? we waste a maximum of 16K (average of 8K) // by 4K and resultant GC will dominate uninvert times. Resize at end if material
int[] newMaxTermCounts = new int[maxTermCounts.length+4096]; int[] newMaxTermCounts = new int[maxTermCounts.length*2];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum); System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
maxTermCounts = newMaxTermCounts; maxTermCounts = newMaxTermCounts;
} }
@ -332,6 +332,14 @@ public class UnInvertedField {
numTermsInField = te.getTermNumber(); numTermsInField = te.getTermNumber();
te.close(); te.close();
// free space if outrageously wasteful (tradeoff memory/cpu)
if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
int[] newMaxTermCounts = new int[numTermsInField];
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
maxTermCounts = newMaxTermCounts;
}
long midPoint = System.currentTimeMillis(); long midPoint = System.currentTimeMillis();
if (termInstances == 0) { if (termInstances == 0) {