mirror of https://github.com/apache/lucene.git
SOLR-1220: use doubling strategy for keeping track of the number of each term in UnInvertedField
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@785258 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c0f6146779
commit
409f39e8bd
|
@ -223,9 +223,9 @@ public class UnInvertedField {
|
||||||
int termNum = te.getTermNumber();
|
int termNum = te.getTermNumber();
|
||||||
|
|
||||||
if (termNum >= maxTermCounts.length) {
|
if (termNum >= maxTermCounts.length) {
|
||||||
// resize, but conserve memory by not doubling
|
// resize by doubling - for very large number of unique terms, expanding
|
||||||
// resize at end??? we waste a maximum of 16K (average of 8K)
|
// by 4K and resultant GC will dominate uninvert times. Resize at end if material
|
||||||
int[] newMaxTermCounts = new int[maxTermCounts.length+4096];
|
int[] newMaxTermCounts = new int[maxTermCounts.length*2];
|
||||||
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
|
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
|
||||||
maxTermCounts = newMaxTermCounts;
|
maxTermCounts = newMaxTermCounts;
|
||||||
}
|
}
|
||||||
|
@ -332,6 +332,14 @@ public class UnInvertedField {
|
||||||
numTermsInField = te.getTermNumber();
|
numTermsInField = te.getTermNumber();
|
||||||
te.close();
|
te.close();
|
||||||
|
|
||||||
|
// free space if outrageously wasteful (tradeoff memory/cpu)
|
||||||
|
|
||||||
|
if ((maxTermCounts.length - numTermsInField) > 1024) { // too much waste!
|
||||||
|
int[] newMaxTermCounts = new int[numTermsInField];
|
||||||
|
System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, numTermsInField);
|
||||||
|
maxTermCounts = newMaxTermCounts;
|
||||||
|
}
|
||||||
|
|
||||||
long midPoint = System.currentTimeMillis();
|
long midPoint = System.currentTimeMillis();
|
||||||
|
|
||||||
if (termInstances == 0) {
|
if (termInstances == 0) {
|
||||||
|
|
Loading…
Reference in New Issue