From c67ef8657f5470d50cc81322156c60c21df0cb5c Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 21 Jan 2013 17:25:55 +0000 Subject: [PATCH] move the TermsEnum up from SortedDV git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1436498 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/index/SortedDocValues.java | 98 ------------ .../apache/lucene/search/FieldCacheImpl.java | 141 ------------------ .../search/FieldCacheRewriteMethod.java | 3 +- .../apache/lucene/search/TestFieldCache.java | 2 +- .../term/TermGroupFacetCollector.java | 3 +- .../PerSegmentSingleValuedFaceting.java | 3 +- 6 files changed, 7 insertions(+), 243 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java index 0da9557d3e5..d36c6896980 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java @@ -49,102 +49,6 @@ public abstract class SortedDocValues extends BinaryDocValues { // API? why must it be impl'd here...? // SortedDocValuesTermsEnum. - public TermsEnum getTermsEnum() { - // nocommit who tests this base impl ... - // Default impl just uses the existing API; subclasses - // can specialize: - return new TermsEnum() { - private int currentOrd = -1; - - private final BytesRef term = new BytesRef(); - - @Override - public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException { - int ord = lookupTerm(text, term); - if (ord > 0) { - currentOrd = ord; - term.offset = 0; - term.copyBytes(text); - return SeekStatus.FOUND; - } else { - currentOrd = -ord-1; - if (currentOrd == getValueCount()) { - return SeekStatus.END; - } else { - // nocommit hmm can we avoid this "extra" lookup?: - lookupOrd(currentOrd, term); - return SeekStatus.NOT_FOUND; - } - } - } - - @Override - public void seekExact(long ord) throws IOException { - assert ord >= 0 && ord < getValueCount(); - currentOrd = (int) ord; - lookupOrd(currentOrd, term); - } - - @Override - public BytesRef next() throws IOException { - currentOrd++; - if (currentOrd >= getValueCount()) { - return null; - } - lookupOrd(currentOrd, term); - return term; - } - - @Override - public BytesRef term() throws IOException { - return term; - } - - @Override - public long ord() throws IOException { - return currentOrd; - } - - @Override - public int docFreq() { - throw new UnsupportedOperationException(); - } - - @Override - public long totalTermFreq() { - return -1; - } - - @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public Comparator getComparator() { - return BytesRef.getUTF8SortedAsUnicodeComparator(); - } - - @Override - public void seekExact(BytesRef term, TermState state) throws IOException { - assert state != null && state instanceof OrdTermState; - this.seekExact(((OrdTermState)state).ord); - } - - @Override - public TermState termState() throws IOException { - OrdTermState state = new OrdTermState(); - state.ord = currentOrd; - return state; - } - }; - } - public static final SortedDocValues EMPTY = new SortedDocValues() { @Override public int getOrd(int docID) { @@ -169,8 +73,6 @@ public abstract class SortedDocValues extends BinaryDocValues { * @param key Key to look up * @param spare Spare BytesRef **/ - // nocommit make this protected so codecs can impl better - // version ... public int lookupTerm(BytesRef key, BytesRef spare) { int low = 0; diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index e6b3d0c9ce6..f556c02fcca 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -975,147 +975,6 @@ class FieldCacheImpl implements FieldCache { } bytes.fill(ret, termOrdToBytesOffset.get(ord)); } - - @Override - public TermsEnum getTermsEnum() { - return this.new SortedDocValuesEnum(); - } - - class SortedDocValuesEnum extends TermsEnum { - int currentOrd; - int currentBlockNumber; - int end; // end position in the current block - final byte[][] blocks; - final int[] blockEnds; - - final BytesRef term = new BytesRef(); - - public SortedDocValuesEnum() { - currentOrd = -1; - currentBlockNumber = 0; - blocks = bytes.getBlocks(); - blockEnds = bytes.getBlockEnds(); - term.bytes = blocks[0]; - end = blockEnds[currentBlockNumber]; - } - - @Override - public SeekStatus seekCeil(BytesRef text, boolean useCache /* ignored */) throws IOException { - int low = 0; - int high = numOrd-1; - - while (low <= high) { - int mid = (low + high) >>> 1; - seekExact(mid); - int cmp = term.compareTo(text); - - if (cmp < 0) - low = mid + 1; - else if (cmp > 0) - high = mid - 1; - else { - return SeekStatus.FOUND; // key found - } - } - - if (low == numOrd) { - return SeekStatus.END; - } else { - seekExact(low); - return SeekStatus.NOT_FOUND; - } - } - - @Override - public void seekExact(long ord) throws IOException { - assert ord >= 0 && ord < numOrd; - // TODO: if gap is small, could iterate from current position? Or let user decide that? - currentBlockNumber = bytes.fillAndGetIndex(term, termOrdToBytesOffset.get((int)ord)); - end = blockEnds[currentBlockNumber]; - currentOrd = (int)ord; - } - - @Override - public BytesRef next() throws IOException { - int start = term.offset + term.length; - if (start >= end) { - // switch byte blocks - if (currentBlockNumber+1 >= blocks.length) { - assert currentOrd+1 == numOrd: "currentOrd=" + currentOrd + " numOrd=" + numOrd; - return null; - } - currentBlockNumber++; - term.bytes = blocks[currentBlockNumber]; - end = blockEnds[currentBlockNumber]; - start = 0; - if (end<=0) { - assert currentOrd+1 == numOrd; - return null; // special case of empty last array - } - } - - currentOrd++; - - byte[] block = term.bytes; - if ((block[start] & 128) == 0) { - term.length = block[start]; - term.offset = start+1; - } else { - term.length = (((block[start] & 0x7f)) << 8) | (block[1+start] & 0xff); - term.offset = start+2; - } - - return term; - } - - @Override - public BytesRef term() throws IOException { - return term; - } - - @Override - public long ord() throws IOException { - return currentOrd; - } - - @Override - public int docFreq() { - throw new UnsupportedOperationException(); - } - - @Override - public long totalTermFreq() { - return -1; - } - - @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public Comparator getComparator() { - return BytesRef.getUTF8SortedAsUnicodeComparator(); - } - - @Override - public void seekExact(BytesRef term, TermState state) throws IOException { - assert state != null && state instanceof OrdTermState; - this.seekExact(((OrdTermState)state).ord); - } - - @Override - public TermState termState() throws IOException { - OrdTermState state = new OrdTermState(); - state.ord = currentOrd; - return state; - } - } } // nocommit for DV if you ask for sorted or binary we diff --git a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java index bd4319958f5..f55e33d8ea0 100644 --- a/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java +++ b/lucene/core/src/test/org/apache/lucene/search/FieldCacheRewriteMethod.java @@ -23,6 +23,7 @@ import java.util.Comparator; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedDocValuesTermsEnum; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; @@ -101,7 +102,7 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod @Override public TermsEnum iterator(TermsEnum reuse) { - return fcsi.getTermsEnum(); + return new SortedDocValuesTermsEnum(fcsi); } @Override diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java index bd11620e742..658ed9571eb 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java @@ -208,7 +208,7 @@ public class TestFieldCache extends LuceneTestCase { int nTerms = termsIndex.getValueCount(); // System.out.println("nTerms="+nTerms); - TermsEnum tenum = termsIndex.getTermsEnum(); + TermsEnum tenum = new SortedDocValuesTermsEnum(termsIndex); BytesRef val = new BytesRef(); for (int i=0; i