move the TermsEnum up from SortedDV

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436498 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-01-21 17:25:55 +00:00
parent 39e77a6f8e
commit c67ef8657f
6 changed files with 7 additions and 243 deletions

View File

@ -49,102 +49,6 @@ public abstract class SortedDocValues extends BinaryDocValues {
// API? why must it be impl'd here...?
// SortedDocValuesTermsEnum.
public TermsEnum getTermsEnum() {
// nocommit who tests this base impl ...
// Default impl just uses the existing API; subclasses
// can specialize:
return new TermsEnum() {
private int currentOrd = -1;
private final BytesRef term = new BytesRef();
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
int ord = lookupTerm(text, term);
if (ord > 0) {
currentOrd = ord;
term.offset = 0;
term.copyBytes(text);
return SeekStatus.FOUND;
} else {
currentOrd = -ord-1;
if (currentOrd == getValueCount()) {
return SeekStatus.END;
} else {
// nocommit hmm can we avoid this "extra" lookup?:
lookupOrd(currentOrd, term);
return SeekStatus.NOT_FOUND;
}
}
}
@Override
public void seekExact(long ord) throws IOException {
assert ord >= 0 && ord < getValueCount();
currentOrd = (int) ord;
lookupOrd(currentOrd, term);
}
@Override
public BytesRef next() throws IOException {
currentOrd++;
if (currentOrd >= getValueCount()) {
return null;
}
lookupOrd(currentOrd, term);
return term;
}
@Override
public BytesRef term() throws IOException {
return term;
}
@Override
public long ord() throws IOException {
return currentOrd;
}
@Override
public int docFreq() {
throw new UnsupportedOperationException();
}
@Override
public long totalTermFreq() {
return -1;
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
}
@Override
public TermState termState() throws IOException {
OrdTermState state = new OrdTermState();
state.ord = currentOrd;
return state;
}
};
}
public static final SortedDocValues EMPTY = new SortedDocValues() {
@Override
public int getOrd(int docID) {
@ -169,8 +73,6 @@ public abstract class SortedDocValues extends BinaryDocValues {
* @param key Key to look up
* @param spare Spare BytesRef
**/
// nocommit make this protected so codecs can impl better
// version ...
public int lookupTerm(BytesRef key, BytesRef spare) {
int low = 0;

View File

@ -975,147 +975,6 @@ class FieldCacheImpl implements FieldCache {
}
bytes.fill(ret, termOrdToBytesOffset.get(ord));
}
@Override
public TermsEnum getTermsEnum() {
return this.new SortedDocValuesEnum();
}
class SortedDocValuesEnum extends TermsEnum {
int currentOrd;
int currentBlockNumber;
int end; // end position in the current block
final byte[][] blocks;
final int[] blockEnds;
final BytesRef term = new BytesRef();
public SortedDocValuesEnum() {
currentOrd = -1;
currentBlockNumber = 0;
blocks = bytes.getBlocks();
blockEnds = bytes.getBlockEnds();
term.bytes = blocks[0];
end = blockEnds[currentBlockNumber];
}
@Override
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
int low = 0;
int high = numOrd-1;
while (low <= high) {
int mid = (low + high) >>> 1;
seekExact(mid);
int cmp = term.compareTo(text);
if (cmp < 0)
low = mid + 1;
else if (cmp > 0)
high = mid - 1;
else {
return SeekStatus.FOUND; // key found
}
}
if (low == numOrd) {
return SeekStatus.END;
} else {
seekExact(low);
return SeekStatus.NOT_FOUND;
}
}
@Override
public void seekExact(long ord) throws IOException {
assert ord >= 0 && ord < numOrd;
// TODO: if gap is small, could iterate from current position? Or let user decide that?
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord));
end = blockEnds[currentBlockNumber];
currentOrd = (int)ord;
}
@Override
public BytesRef next() throws IOException {
int start = term.offset + term.length;
if (start >= end) {
// switch byte blocks
if (currentBlockNumber+1 >= blocks.length) {
assert currentOrd+1 == numOrd: "currentOrd=" + currentOrd + " numOrd=" + numOrd;
return null;
}
currentBlockNumber++;
term.bytes = blocks[currentBlockNumber];
end = blockEnds[currentBlockNumber];
start = 0;
if (end<=0) {
assert currentOrd+1 == numOrd;
return null; // special case of empty last array
}
}
currentOrd++;
byte[] block = term.bytes;
if ((block[start] & 128) == 0) {
term.length = block[start];
term.offset = start+1;
} else {
term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
term.offset = start+2;
}
return term;
}
@Override
public BytesRef term() throws IOException {
return term;
}
@Override
public long ord() throws IOException {
return currentOrd;
}
@Override
public int docFreq() {
throw new UnsupportedOperationException();
}
@Override
public long totalTermFreq() {
return -1;
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public Comparator<BytesRef> getComparator() {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void seekExact(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
this.seekExact(((OrdTermState)state).ord);
}
@Override
public TermState termState() throws IOException {
OrdTermState state = new OrdTermState();
state.ord = currentOrd;
return state;
}
}
}
// nocommit for DV if you ask for sorted or binary we

View File

@ -23,6 +23,7 @@ import java.util.Comparator;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedDocValuesTermsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
@ -101,7 +102,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
@Override
public TermsEnum iterator(TermsEnum reuse) {
return fcsi.getTermsEnum();
return new SortedDocValuesTermsEnum(fcsi);
}
@Override

View File

@ -208,7 +208,7 @@ public class TestFieldCache extends LuceneTestCase {
int nTerms = termsIndex.getValueCount();
// System.out.println("nTerms="+nTerms);
TermsEnum tenum = termsIndex.getTermsEnum();
TermsEnum tenum = new SortedDocValuesTermsEnum(termsIndex);
BytesRef val = new BytesRef();
for (int i=0; i<nTerms; i++) {
BytesRef val1 = tenum.next();

View File

@ -24,6 +24,7 @@ import java.util.List;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedDocValuesTermsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
@ -168,7 +169,7 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
@Override
protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.getTermsEnum(), startFacetOrd, endFacetOrd);
return new SegmentResult(segmentFacetCounts, segmentTotalCount, new SortedDocValuesTermsEnum(facetFieldTermsIndex), startFacetOrd, endFacetOrd);
}
private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {

View File

@ -23,6 +23,7 @@ import java.util.concurrent.*;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedDocValuesTermsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
@ -154,7 +155,7 @@ class PerSegmentSingleValuedFaceting {
seg.pos = seg.startTermIndex;
}
if (seg.pos < seg.endTermIndex) {
seg.tenum = seg.si.getTermsEnum();
seg.tenum = new SortedDocValuesTermsEnum(seg.si);
seg.tenum.seekExact(seg.pos);
seg.tempBR = seg.tenum.term();
queue.add(seg);