mirror of https://github.com/apache/lucene.git
move the TermsEnum up from SortedDV
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436498 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
39e77a6f8e
commit
c67ef8657f
|
@ -49,102 +49,6 @@ public abstract class SortedDocValues extends BinaryDocValues {
|
|||
// API? why must it be impl'd here...?
|
||||
// SortedDocValuesTermsEnum.
|
||||
|
||||
public TermsEnum getTermsEnum() {
|
||||
// nocommit who tests this base impl ...
|
||||
// Default impl just uses the existing API; subclasses
|
||||
// can specialize:
|
||||
return new TermsEnum() {
|
||||
private int currentOrd = -1;
|
||||
|
||||
private final BytesRef term = new BytesRef();
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||
int ord = lookupTerm(text, term);
|
||||
if (ord > 0) {
|
||||
currentOrd = ord;
|
||||
term.offset = 0;
|
||||
term.copyBytes(text);
|
||||
return SeekStatus.FOUND;
|
||||
} else {
|
||||
currentOrd = -ord-1;
|
||||
if (currentOrd == getValueCount()) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
// nocommit hmm can we avoid this "extra" lookup?:
|
||||
lookupOrd(currentOrd, term);
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
assert ord >= 0 && ord < getValueCount();
|
||||
currentOrd = (int) ord;
|
||||
lookupOrd(currentOrd, term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
currentOrd++;
|
||||
if (currentOrd >= getValueCount()) {
|
||||
return null;
|
||||
}
|
||||
lookupOrd(currentOrd, term);
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() throws IOException {
|
||||
return currentOrd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
assert state != null && state instanceof OrdTermState;
|
||||
this.seekExact(((OrdTermState)state).ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
OrdTermState state = new OrdTermState();
|
||||
state.ord = currentOrd;
|
||||
return state;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static final SortedDocValues EMPTY = new SortedDocValues() {
|
||||
@Override
|
||||
public int getOrd(int docID) {
|
||||
|
@ -169,8 +73,6 @@ public abstract class SortedDocValues extends BinaryDocValues {
|
|||
* @param key Key to look up
|
||||
* @param spare Spare BytesRef
|
||||
**/
|
||||
// nocommit make this protected so codecs can impl better
|
||||
// version ...
|
||||
public int lookupTerm(BytesRef key, BytesRef spare) {
|
||||
|
||||
int low = 0;
|
||||
|
|
|
@ -975,147 +975,6 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
bytes.fill(ret, termOrdToBytesOffset.get(ord));
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum getTermsEnum() {
|
||||
return this.new SortedDocValuesEnum();
|
||||
}
|
||||
|
||||
class SortedDocValuesEnum extends TermsEnum {
|
||||
int currentOrd;
|
||||
int currentBlockNumber;
|
||||
int end; // end position in the current block
|
||||
final byte[][] blocks;
|
||||
final int[] blockEnds;
|
||||
|
||||
final BytesRef term = new BytesRef();
|
||||
|
||||
public SortedDocValuesEnum() {
|
||||
currentOrd = -1;
|
||||
currentBlockNumber = 0;
|
||||
blocks = bytes.getBlocks();
|
||||
blockEnds = bytes.getBlockEnds();
|
||||
term.bytes = blocks[0];
|
||||
end = blockEnds[currentBlockNumber];
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException {
|
||||
int low = 0;
|
||||
int high = numOrd-1;
|
||||
|
||||
while (low <= high) {
|
||||
int mid = (low + high) >>> 1;
|
||||
seekExact(mid);
|
||||
int cmp = term.compareTo(text);
|
||||
|
||||
if (cmp < 0)
|
||||
low = mid + 1;
|
||||
else if (cmp > 0)
|
||||
high = mid - 1;
|
||||
else {
|
||||
return SeekStatus.FOUND; // key found
|
||||
}
|
||||
}
|
||||
|
||||
if (low == numOrd) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
seekExact(low);
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) throws IOException {
|
||||
assert ord >= 0 && ord < numOrd;
|
||||
// TODO: if gap is small, could iterate from current position? Or let user decide that?
|
||||
currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord));
|
||||
end = blockEnds[currentBlockNumber];
|
||||
currentOrd = (int)ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
int start = term.offset + term.length;
|
||||
if (start >= end) {
|
||||
// switch byte blocks
|
||||
if (currentBlockNumber+1 >= blocks.length) {
|
||||
assert currentOrd+1 == numOrd: "currentOrd=" + currentOrd + " numOrd=" + numOrd;
|
||||
return null;
|
||||
}
|
||||
currentBlockNumber++;
|
||||
term.bytes = blocks[currentBlockNumber];
|
||||
end = blockEnds[currentBlockNumber];
|
||||
start = 0;
|
||||
if (end<=0) {
|
||||
assert currentOrd+1 == numOrd;
|
||||
return null; // special case of empty last array
|
||||
}
|
||||
}
|
||||
|
||||
currentOrd++;
|
||||
|
||||
byte[] block = term.bytes;
|
||||
if ((block[start] & 128) == 0) {
|
||||
term.length = block[start];
|
||||
term.offset = start+1;
|
||||
} else {
|
||||
term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff);
|
||||
term.offset = start+2;
|
||||
}
|
||||
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() throws IOException {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() throws IOException {
|
||||
return currentOrd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Comparator<BytesRef> getComparator() {
|
||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
assert state != null && state instanceof OrdTermState;
|
||||
this.seekExact(((OrdTermState)state).ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
OrdTermState state = new OrdTermState();
|
||||
state.ord = currentOrd;
|
||||
return state;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit for DV if you ask for sorted or binary we
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Comparator;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -101,7 +102,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
|||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) {
|
||||
return fcsi.getTermsEnum();
|
||||
return new SortedDocValuesTermsEnum(fcsi);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -208,7 +208,7 @@ public class TestFieldCache extends LuceneTestCase {
|
|||
int nTerms = termsIndex.getValueCount();
|
||||
// System.out.println("nTerms="+nTerms);
|
||||
|
||||
TermsEnum tenum = termsIndex.getTermsEnum();
|
||||
TermsEnum tenum = new SortedDocValuesTermsEnum(termsIndex);
|
||||
BytesRef val = new BytesRef();
|
||||
for (int i=0; i<nTerms; i++) {
|
||||
BytesRef val1 = tenum.next();
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DocTermOrds;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
|
||||
|
@ -168,7 +169,7 @@ public abstract class TermGroupFacetCollector extends AbstractGroupFacetCollecto
|
|||
|
||||
@Override
|
||||
protected SegmentResult createSegmentResult() throws IOException {
|
||||
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.getTermsEnum(), startFacetOrd, endFacetOrd);
|
||||
return new SegmentResult(segmentFacetCounts, segmentTotalCount, new SortedDocValuesTermsEnum(facetFieldTermsIndex), startFacetOrd, endFacetOrd);
|
||||
}
|
||||
|
||||
private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.concurrent.*;
|
|||
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedDocValuesTermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
@ -154,7 +155,7 @@ class PerSegmentSingleValuedFaceting {
|
|||
seg.pos = seg.startTermIndex;
|
||||
}
|
||||
if (seg.pos < seg.endTermIndex) {
|
||||
seg.tenum = seg.si.getTermsEnum();
|
||||
seg.tenum = new SortedDocValuesTermsEnum(seg.si);
|
||||
seg.tenum.seekExact(seg.pos);
|
||||
seg.tempBR = seg.tenum.term();
|
||||
queue.add(seg);
|
||||
|
|
Loading…
Reference in New Issue