don't merge terms into ram when merging sortedbytes

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1440731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-01-30 23:07:36 +00:00
parent 8ccf87a20a
commit 2504d3266e
1 changed files with 30 additions and 12 deletions

View File

@ -35,6 +35,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.packed.AppendingLongBuffer;
/** /**
* Abstract API that consumes numeric, binary and * Abstract API that consumes numeric, binary and
@ -240,7 +241,7 @@ public abstract class DocValuesConsumer implements Closeable {
public int numMergedTerms; public int numMergedTerms;
final List<BytesRef> mergedTerms = new ArrayList<BytesRef>(); final AppendingLongBuffer ordToReaderId = new AppendingLongBuffer();
final List<SegmentState> segStates = new ArrayList<SegmentState>(); final List<SegmentState> segStates = new ArrayList<SegmentState>();
private static class SegmentState { private static class SegmentState {
@ -250,6 +251,8 @@ public abstract class DocValuesConsumer implements Closeable {
int ord = -1; int ord = -1;
SortedDocValues values; SortedDocValues values;
BytesRef scratch = new BytesRef(); BytesRef scratch = new BytesRef();
int lastOrd = -1; // last REAL ord we looked up: nocommit: clean this up
AppendingLongBuffer ordDeltas = new AppendingLongBuffer();
// nocommit can we factor out the compressed fields // nocommit can we factor out the compressed fields
// compression? ie we have a good idea "roughly" what // compression? ie we have a good idea "roughly" what
@ -262,10 +265,11 @@ public abstract class DocValuesConsumer implements Closeable {
ord++; ord++;
if (liveTerms == null || liveTerms.get(ord)) { if (liveTerms == null || liveTerms.get(ord)) {
values.lookupOrd(ord, scratch); values.lookupOrd(ord, scratch);
lastOrd = ord;
return scratch; return scratch;
} else { } else {
// Skip "deleted" terms (ie, terms that were not // Skip "deleted" terms (ie, terms that were not
// referenced by any live docs): // referenced by any live docs): nocommit: why?!
values.lookupOrd(ord, scratch); values.lookupOrd(ord, scratch);
} }
} }
@ -337,18 +341,15 @@ public abstract class DocValuesConsumer implements Closeable {
if (lastTerm == null || !lastTerm.equals(top.scratch)) { if (lastTerm == null || !lastTerm.equals(top.scratch)) {
// a new unique term: record its segment ID / sourceOrd pair // a new unique term: record its segment ID / sourceOrd pair
int readerId = top.segmentID; int readerId = top.segmentID;
int sourceOrd = top.ord; ordToReaderId.add(readerId);
// nocommit: do this
// ordToReaderID.add(readerId); int sourceOrd = top.lastOrd;
int delta = sourceOrd - lastOrds[readerId]; int delta = sourceOrd - lastOrds[readerId];
lastOrds[readerId] = sourceOrd; lastOrds[readerId] = sourceOrd;
// nocommit: do this top.ordDeltas.add(delta);
// top.ordDeltas.add(delta);
lastTerm = BytesRef.deepCopyOf(top.scratch); lastTerm = BytesRef.deepCopyOf(top.scratch);
// nocommit we could spill this to disk instead of
// RAM, and replay on finish...
mergedTerms.add(lastTerm);
ord++; ord++;
} }
@ -408,12 +409,24 @@ public abstract class DocValuesConsumer implements Closeable {
new Iterable<BytesRef>() { new Iterable<BytesRef>() {
@Override @Override
public Iterator<BytesRef> iterator() { public Iterator<BytesRef> iterator() {
// for each next(), tells us what reader to go to
final AppendingLongBuffer.Iterator readerIDs = merger.ordToReaderId.iterator();
// for each next(), gives us the original ord
final AppendingLongBuffer.Iterator ordDeltas[] = new AppendingLongBuffer.Iterator[merger.segStates.size()];
final int lastOrds[] = new int[ordDeltas.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = merger.segStates.get(i).ordDeltas.iterator();
}
final BytesRef scratch = new BytesRef();
return new Iterator<BytesRef>() { return new Iterator<BytesRef>() {
int ordUpto; int ordUpto;
@Override @Override
public boolean hasNext() { public boolean hasNext() {
return ordUpto < merger.mergedTerms.size(); return ordUpto < merger.numMergedTerms;
} }
@Override @Override
@ -426,7 +439,12 @@ public abstract class DocValuesConsumer implements Closeable {
if (!hasNext()) { if (!hasNext()) {
throw new NoSuchElementException(); throw new NoSuchElementException();
} }
return merger.mergedTerms.get(ordUpto++); int readerID = (int) readerIDs.next();
int ord = lastOrds[readerID] + (int) ordDeltas[readerID].next();
merger.segStates.get(readerID).values.lookupOrd(ord, scratch);
lastOrds[readerID] = ord;
ordUpto++;
return scratch;
} }
}; };
} }