mirror of https://github.com/apache/lucene.git
don't merge terms into ram when merging sortedbytes
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8ccf87a20a
commit
2504d3266e
|
@ -35,6 +35,7 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.FixedBitSet;
|
import org.apache.lucene.util.FixedBitSet;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
import org.apache.lucene.util.packed.AppendingLongBuffer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract API that consumes numeric, binary and
|
* Abstract API that consumes numeric, binary and
|
||||||
|
@ -240,7 +241,7 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
|
|
||||||
public int numMergedTerms;
|
public int numMergedTerms;
|
||||||
|
|
||||||
final List<BytesRef> mergedTerms = new ArrayList<BytesRef>();
|
final AppendingLongBuffer ordToReaderId = new AppendingLongBuffer();
|
||||||
final List<SegmentState> segStates = new ArrayList<SegmentState>();
|
final List<SegmentState> segStates = new ArrayList<SegmentState>();
|
||||||
|
|
||||||
private static class SegmentState {
|
private static class SegmentState {
|
||||||
|
@ -250,6 +251,8 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
int ord = -1;
|
int ord = -1;
|
||||||
SortedDocValues values;
|
SortedDocValues values;
|
||||||
BytesRef scratch = new BytesRef();
|
BytesRef scratch = new BytesRef();
|
||||||
|
int lastOrd = -1; // last REAL ord we looked up: nocommit: clean this up
|
||||||
|
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
|
||||||
|
|
||||||
// nocommit can we factor out the compressed fields
|
// nocommit can we factor out the compressed fields
|
||||||
// compression? ie we have a good idea "roughly" what
|
// compression? ie we have a good idea "roughly" what
|
||||||
|
@ -262,10 +265,11 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
ord++;
|
ord++;
|
||||||
if (liveTerms == null || liveTerms.get(ord)) {
|
if (liveTerms == null || liveTerms.get(ord)) {
|
||||||
values.lookupOrd(ord, scratch);
|
values.lookupOrd(ord, scratch);
|
||||||
|
lastOrd = ord;
|
||||||
return scratch;
|
return scratch;
|
||||||
} else {
|
} else {
|
||||||
// Skip "deleted" terms (ie, terms that were not
|
// Skip "deleted" terms (ie, terms that were not
|
||||||
// referenced by any live docs):
|
// referenced by any live docs): nocommit: why?!
|
||||||
values.lookupOrd(ord, scratch);
|
values.lookupOrd(ord, scratch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -337,18 +341,15 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
if (lastTerm == null || !lastTerm.equals(top.scratch)) {
|
if (lastTerm == null || !lastTerm.equals(top.scratch)) {
|
||||||
// a new unique term: record its segment ID / sourceOrd pair
|
// a new unique term: record its segment ID / sourceOrd pair
|
||||||
int readerId = top.segmentID;
|
int readerId = top.segmentID;
|
||||||
int sourceOrd = top.ord;
|
ordToReaderId.add(readerId);
|
||||||
// nocommit: do this
|
|
||||||
// ordToReaderID.add(readerId);
|
int sourceOrd = top.lastOrd;
|
||||||
|
|
||||||
int delta = sourceOrd - lastOrds[readerId];
|
int delta = sourceOrd - lastOrds[readerId];
|
||||||
lastOrds[readerId] = sourceOrd;
|
lastOrds[readerId] = sourceOrd;
|
||||||
// nocommit: do this
|
top.ordDeltas.add(delta);
|
||||||
// top.ordDeltas.add(delta);
|
|
||||||
|
|
||||||
lastTerm = BytesRef.deepCopyOf(top.scratch);
|
lastTerm = BytesRef.deepCopyOf(top.scratch);
|
||||||
// nocommit we could spill this to disk instead of
|
|
||||||
// RAM, and replay on finish...
|
|
||||||
mergedTerms.add(lastTerm);
|
|
||||||
ord++;
|
ord++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -408,12 +409,24 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
new Iterable<BytesRef>() {
|
new Iterable<BytesRef>() {
|
||||||
@Override
|
@Override
|
||||||
public Iterator<BytesRef> iterator() {
|
public Iterator<BytesRef> iterator() {
|
||||||
|
// for each next(), tells us what reader to go to
|
||||||
|
final AppendingLongBuffer.Iterator readerIDs = merger.ordToReaderId.iterator();
|
||||||
|
// for each next(), gives us the original ord
|
||||||
|
final AppendingLongBuffer.Iterator ordDeltas[] = new AppendingLongBuffer.Iterator[merger.segStates.size()];
|
||||||
|
final int lastOrds[] = new int[ordDeltas.length];
|
||||||
|
|
||||||
|
for (int i = 0; i < ordDeltas.length; i++) {
|
||||||
|
ordDeltas[i] = merger.segStates.get(i).ordDeltas.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
final BytesRef scratch = new BytesRef();
|
||||||
|
|
||||||
return new Iterator<BytesRef>() {
|
return new Iterator<BytesRef>() {
|
||||||
int ordUpto;
|
int ordUpto;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
return ordUpto < merger.mergedTerms.size();
|
return ordUpto < merger.numMergedTerms;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -426,7 +439,12 @@ public abstract class DocValuesConsumer implements Closeable {
|
||||||
if (!hasNext()) {
|
if (!hasNext()) {
|
||||||
throw new NoSuchElementException();
|
throw new NoSuchElementException();
|
||||||
}
|
}
|
||||||
return merger.mergedTerms.get(ordUpto++);
|
int readerID = (int) readerIDs.next();
|
||||||
|
int ord = lastOrds[readerID] + (int) ordDeltas[readerID].next();
|
||||||
|
merger.segStates.get(readerID).values.lookupOrd(ord, scratch);
|
||||||
|
lastOrds[readerID] = ord;
|
||||||
|
ordUpto++;
|
||||||
|
return scratch;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue