mirror of https://github.com/apache/lucene.git
use globalOrd-localOrd delta when merging instead of huge int[]
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440788 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4eec692bdf
commit
3f0d6bab42
|
@ -253,11 +253,12 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
BytesRef scratch = new BytesRef();
|
||||
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
|
||||
|
||||
// nocommit can we factor out the compressed fields
|
||||
// compression? ie we have a good idea "roughly" what
|
||||
// TODO: use another scheme?
|
||||
// currently we +/- delta merged-ord from segment-ord (is this good? makes sense to me?)
|
||||
// but we have a good idea "roughly" what
|
||||
// the ord should be (linear projection) so we only
|
||||
// need to encode the delta from that ...:
|
||||
int[] segOrdToMergedOrd;
|
||||
AppendingLongBuffer segOrdToMergedOrd = new AppendingLongBuffer();
|
||||
|
||||
public BytesRef nextTerm() {
|
||||
while (ord < values.getValueCount()-1) {
|
||||
|
@ -317,11 +318,6 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
TermMergeQueue q = new TermMergeQueue(segStates.size());
|
||||
for(SegmentState segState : segStates) {
|
||||
if (segState.nextTerm() != null) {
|
||||
|
||||
// nocommit we could defer this to 3rd pass (and
|
||||
// reduce transient RAM spike) but then
|
||||
// we'd spend more effort computing the mapping...:
|
||||
segState.segOrdToMergedOrd = new int[segState.values.getValueCount()];
|
||||
q.add(segState);
|
||||
}
|
||||
}
|
||||
|
@ -345,7 +341,12 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
ord++;
|
||||
}
|
||||
|
||||
top.segOrdToMergedOrd[top.ord] = ord-1;
|
||||
long signedDelta = (ord-1) - top.ord; // global ord space - segment ord space
|
||||
// fill in any holes for unused ords, then finally the value we want (segOrdToMergedOrd[top.ord])
|
||||
// TODO: is there a better way...
|
||||
while (top.segOrdToMergedOrd.size() <= top.ord) {
|
||||
top.segOrdToMergedOrd.add(signedDelta);
|
||||
}
|
||||
if (top.nextTerm() == null) {
|
||||
q.pop();
|
||||
} else {
|
||||
|
@ -494,7 +495,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
|||
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
|
||||
nextIsSet = true;
|
||||
int segOrd = currentReader.values.getOrd(docIDUpto);
|
||||
nextValue = currentReader.segOrdToMergedOrd[segOrd];
|
||||
nextValue = (int) (segOrd + currentReader.segOrdToMergedOrd.get(segOrd));
|
||||
docIDUpto++;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -29,7 +29,9 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
*/
|
||||
public class AppendingLongBuffer {
|
||||
|
||||
private static final int MAX_PENDING_COUNT = 1024;
|
||||
private static final int BLOCK_BITS = 10;
|
||||
private static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
|
||||
private static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
|
||||
|
||||
private long[] minValues;
|
||||
private PackedInts.Reader[] values;
|
||||
|
@ -55,6 +57,24 @@ public class AppendingLongBuffer {
|
|||
pending[pendingOff++] = l;
|
||||
}
|
||||
|
||||
/** Get a value from this buffer.
|
||||
* <p>
|
||||
* <b>NOTE</b>: This class is not really designed for random access!
|
||||
* You will likely get better performance by using packed ints in another way! */
|
||||
public long get(int index) {
|
||||
assert index < size(); // TODO: do a better check, and throw IndexOutOfBoundsException?
|
||||
// This class is currently only used by the indexer.
|
||||
int block = index >> BLOCK_BITS;
|
||||
int element = index & BLOCK_MASK;
|
||||
if (block == valuesOff) {
|
||||
return pending[element];
|
||||
} else if (values[block] == null) {
|
||||
return minValues[block];
|
||||
} else {
|
||||
return minValues[block] + values[block].get(element);
|
||||
}
|
||||
}
|
||||
|
||||
private void packPendingValues() {
|
||||
assert pendingOff == MAX_PENDING_COUNT;
|
||||
|
||||
|
|
|
@ -833,6 +833,10 @@ public class TestPackedInts extends LuceneTestCase {
|
|||
}
|
||||
assertFalse(it.hasNext());
|
||||
|
||||
for (int i = 0; i < arr.length; ++i) {
|
||||
assertEquals(arr[i], buf.get(i));
|
||||
}
|
||||
|
||||
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
|
||||
final long computedBytesUsed = buf.ramBytesUsed();
|
||||
assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
|
||||
|
|
Loading…
Reference in New Issue