use globalOrd-localOrd delta when merging instead of huge int[]

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440788 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-31 01:59:35 +00:00
parent 4eec692bdf
commit 3f0d6bab42
3 changed files with 36 additions and 11 deletions

View File

@ -253,11 +253,12 @@ public abstract class DocValuesConsumer implements Closeable {
BytesRef scratch = new BytesRef();
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
// nocommit can we factor out the compressed fields
// compression? ie we have a good idea "roughly" what
// TODO: use another scheme?
// currently we +/- delta merged-ord from segment-ord (is this good? makes sense to me?)
// but we have a good idea "roughly" what
// the ord should be (linear projection) so we only
// need to encode the delta from that ...:
int[] segOrdToMergedOrd;
AppendingLongBuffer segOrdToMergedOrd = new AppendingLongBuffer();
public BytesRef nextTerm() {
while (ord < values.getValueCount()-1) {
@ -317,11 +318,6 @@ public abstract class DocValuesConsumer implements Closeable {
TermMergeQueue q = new TermMergeQueue(segStates.size());
for(SegmentState segState : segStates) {
if (segState.nextTerm() != null) {
// nocommit we could defer this to 3rd pass (and
// reduce transient RAM spike) but then
// we'd spend more effort computing the mapping...:
segState.segOrdToMergedOrd = new int[segState.values.getValueCount()];
q.add(segState);
}
}
@ -345,7 +341,12 @@ public abstract class DocValuesConsumer implements Closeable {
ord++;
}
top.segOrdToMergedOrd[top.ord] = ord-1;
long signedDelta = (ord-1) - top.ord; // global ord space - segment ord space
// fill in any holes for unused ords, then finally the value we want (segOrdToMergedOrd[top.ord])
// TODO: is there a better way...
while (top.segOrdToMergedOrd.size() <= top.ord) {
top.segOrdToMergedOrd.add(signedDelta);
}
if (top.nextTerm() == null) {
q.pop();
} else {
@ -494,7 +495,7 @@ public abstract class DocValuesConsumer implements Closeable {
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
nextIsSet = true;
int segOrd = currentReader.values.getOrd(docIDUpto);
nextValue = currentReader.segOrdToMergedOrd[segOrd];
nextValue = (int) (segOrd + currentReader.segOrdToMergedOrd.get(segOrd));
docIDUpto++;
return true;
}

View File

@ -29,7 +29,9 @@ import org.apache.lucene.util.RamUsageEstimator;
*/
public class AppendingLongBuffer {
private static final int MAX_PENDING_COUNT = 1024;
private static final int BLOCK_BITS = 10;
private static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
private static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
private long[] minValues;
private PackedInts.Reader[] values;
@ -55,6 +57,24 @@ public class AppendingLongBuffer {
pending[pendingOff++] = l;
}
/** Get a value from this buffer.
* <p>
* <b>NOTE</b>: This class is not really designed for random access!
* You will likely get better performance by using packed ints in another way! */
public long get(int index) {
assert index < size(); // TODO: do a better check, and throw IndexOutOfBoundsException?
// This class is currently only used by the indexer.
int block = index >> BLOCK_BITS;
int element = index & BLOCK_MASK;
if (block == valuesOff) {
return pending[element];
} else if (values[block] == null) {
return minValues[block];
} else {
return minValues[block] + values[block].get(element);
}
}
private void packPendingValues() {
assert pendingOff == MAX_PENDING_COUNT;

View File

@ -833,6 +833,10 @@ public class TestPackedInts extends LuceneTestCase {
}
assertFalse(it.hasNext());
for (int i = 0; i < arr.length; ++i) {
assertEquals(arr[i], buf.get(i));
}
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
final long computedBytesUsed = buf.ramBytesUsed();
assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,