mirror of https://github.com/apache/lucene.git
use globalOrd-localOrd delta when merging instead of huge int[]
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440788 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4eec692bdf
commit
3f0d6bab42
|
@ -253,11 +253,12 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
|
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
|
||||||
|
|
||||||
// nocommit can we factor out the compressed fields
|
// TODO: use another scheme?
|
||||||
// compression? ie we have a good idea "roughly" what
|
// currently we +/- delta merged-ord from segment-ord (is this good? makes sense to me?)
|
||||||
|
// but we have a good idea "roughly" what
|
||||||
// the ord should be (linear projection) so we only
|
// the ord should be (linear projection) so we only
|
||||||
// need to encode the delta from that ...:
|
// need to encode the delta from that ...:
|
||||||
int[] segOrdToMergedOrd;
|
AppendingLongBuffer segOrdToMergedOrd = new AppendingLongBuffer();
|
||||||
|
|
||||||
public BytesRef nextTerm() {
|
public BytesRef nextTerm() {
|
||||||
while (ord < values.getValueCount()-1) {
|
while (ord < values.getValueCount()-1) {
|
||||||
|
@ -317,11 +318,6 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
TermMergeQueue q = new TermMergeQueue(segStates.size());
|
TermMergeQueue q = new TermMergeQueue(segStates.size());
|
||||||
for(SegmentState segState : segStates) {
|
for(SegmentState segState : segStates) {
|
||||||
if (segState.nextTerm() != null) {
|
if (segState.nextTerm() != null) {
|
||||||
|
|
||||||
// nocommit we could defer this to 3rd pass (and
|
|
||||||
// reduce transient RAM spike) but then
|
|
||||||
// we'd spend more effort computing the mapping...:
|
|
||||||
segState.segOrdToMergedOrd = new int[segState.values.getValueCount()];
|
|
||||||
q.add(segState);
|
q.add(segState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -345,7 +341,12 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
ord++;
|
ord++;
|
||||||
}
|
}
|
||||||
|
|
||||||
top.segOrdToMergedOrd[top.ord] = ord-1;
|
long signedDelta = (ord-1) - top.ord; // global ord space - segment ord space
|
||||||
|
// fill in any holes for unused ords, then finally the value we want (segOrdToMergedOrd[top.ord])
|
||||||
|
// TODO: is there a better way...
|
||||||
|
while (top.segOrdToMergedOrd.size() <= top.ord) {
|
||||||
|
top.segOrdToMergedOrd.add(signedDelta);
|
||||||
|
}
|
||||||
if (top.nextTerm() == null) {
|
if (top.nextTerm() == null) {
|
||||||
q.pop();
|
q.pop();
|
||||||
} else {
|
} else {
|
||||||
|
@ -494,7 +495,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
|
if (currentLiveDocs == null || currentLiveDocs.get(docIDUpto)) {
|
||||||
nextIsSet = true;
|
nextIsSet = true;
|
||||||
int segOrd = currentReader.values.getOrd(docIDUpto);
|
int segOrd = currentReader.values.getOrd(docIDUpto);
|
||||||
nextValue = currentReader.segOrdToMergedOrd[segOrd];
|
nextValue = (int) (segOrd + currentReader.segOrdToMergedOrd.get(segOrd));
|
||||||
docIDUpto++;
|
docIDUpto++;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,7 +29,9 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||||
*/
|
*/
|
||||||
public class AppendingLongBuffer {
|
public class AppendingLongBuffer {
|
||||||
|
|
||||||
private static final int MAX_PENDING_COUNT = 1024;
|
private static final int BLOCK_BITS = 10;
|
||||||
|
private static final int MAX_PENDING_COUNT = 1 << BLOCK_BITS;
|
||||||
|
private static final int BLOCK_MASK = MAX_PENDING_COUNT - 1;
|
||||||
|
|
||||||
private long[] minValues;
|
private long[] minValues;
|
||||||
private PackedInts.Reader[] values;
|
private PackedInts.Reader[] values;
|
||||||
|
@ -55,6 +57,24 @@ public class AppendingLongBuffer {
|
||||||
pending[pendingOff++] = l;
|
pending[pendingOff++] = l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Get a value from this buffer.
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE</b>: This class is not really designed for random access!
|
||||||
|
* You will likely get better performance by using packed ints in another way! */
|
||||||
|
public long get(int index) {
|
||||||
|
assert index < size(); // TODO: do a better check, and throw IndexOutOfBoundsException?
|
||||||
|
// This class is currently only used by the indexer.
|
||||||
|
int block = index >> BLOCK_BITS;
|
||||||
|
int element = index & BLOCK_MASK;
|
||||||
|
if (block == valuesOff) {
|
||||||
|
return pending[element];
|
||||||
|
} else if (values[block] == null) {
|
||||||
|
return minValues[block];
|
||||||
|
} else {
|
||||||
|
return minValues[block] + values[block].get(element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void packPendingValues() {
|
private void packPendingValues() {
|
||||||
assert pendingOff == MAX_PENDING_COUNT;
|
assert pendingOff == MAX_PENDING_COUNT;
|
||||||
|
|
||||||
|
|
|
@ -833,6 +833,10 @@ public class TestPackedInts extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
assertFalse(it.hasNext());
|
assertFalse(it.hasNext());
|
||||||
|
|
||||||
|
for (int i = 0; i < arr.length; ++i) {
|
||||||
|
assertEquals(arr[i], buf.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
|
final long expectedBytesUsed = RamUsageEstimator.sizeOf(buf);
|
||||||
final long computedBytesUsed = buf.ramBytesUsed();
|
final long computedBytesUsed = buf.ramBytesUsed();
|
||||||
assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
|
assertEquals("got " + computedBytesUsed + ", expected: " + expectedBytesUsed,
|
||||||
|
|
Loading…
Reference in New Issue