LUCENE-5780: Make OrdinalMap more memory-efficient.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1604158 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2014-06-20 12:22:31 +00:00
parent 0e9d6de916
commit e97145a1dd
2 changed files with 17 additions and 6 deletions

View File

@ -96,12 +96,15 @@ Other
======================= Lucene 4.10.0 ======================
(No Changes)
API Changes
* LUCENE-5752: Simplified Automaton API to be immutable. (Mike McCandless)
Optimizations
* LUCENE-5780: Make OrdinalMap more memory-efficient, especially in case the
first segment has all values. (Adrien Grand, Robert Muir)
======================= Lucene 4.9.0 =======================
Changes in Runtime Behavior

View File

@ -427,14 +427,18 @@ public class MultiDocValues {
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
// for each unique term, just mark the first segment index/delta where it occurs
if (i == 0) {
firstSegments.add(segmentIndex);
globalOrdDeltas.add(delta);
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
@ -443,6 +447,10 @@ public class MultiDocValues {
segmentOrds[segmentIndex]++;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
assert firstSegmentIndex < segmentOrds.length;
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
firstSegments.freeze();