From 9e27723b3702b812fcbb1c452f5b4f9bf232618e Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 17 Nov 2011 17:35:42 +0000 Subject: [PATCH] LUCENE-3562: stop caching thread-private TermsEnums in Terms git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203294 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/misc/GetTermInfo.java | 5 +- .../lucene/index/FilterIndexReader.java | 15 --- .../org/apache/lucene/index/IndexReader.java | 80 +++++++------ .../org/apache/lucene/index/MultiFields.java | 27 ++++- .../apache/lucene/index/SegmentReader.java | 12 -- .../java/org/apache/lucene/index/Terms.java | 111 +----------------- .../lucene/index/codecs/BlockTermsReader.java | 18 +-- .../index/codecs/BlockTreeTermsReader.java | 71 ++++++----- .../lucene/search/MultiPhraseQuery.java | 94 ++++++++++----- .../org/apache/lucene/search/PhraseQuery.java | 40 ++++--- .../org/apache/lucene/search/TermQuery.java | 12 +- .../lucene/search/spans/SpanTermQuery.java | 6 +- .../org/apache/lucene/util/TermContext.java | 3 +- .../lucene/index/TestDocsAndPositions.java | 8 +- .../lucene/index/TestDocumentWriter.java | 8 +- .../apache/lucene/index/TestIndexReader.java | 9 +- .../apache/lucene/index/TestMultiFields.java | 3 +- .../org/apache/lucene/index/TestOmitTf.java | 2 +- .../lucene/index/TestStressIndexing2.java | 29 +++-- .../function/valuesource/TFValueSource.java | 12 +- .../valuesource/TermFreqValueSource.java | 13 +- .../apache/solr/search/SolrIndexSearcher.java | 14 ++- .../apache/solr/search/TestRealTimeGet.java | 7 +- 23 files changed, 280 insertions(+), 319 deletions(-) diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java index 9169eb7fe2d..ae3faf966ce 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/GetTermInfo.java @@ -21,9 +21,7 @@ import java.io.File; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiFields; /* * Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term. @@ -50,10 +48,9 @@ public class GetTermInfo { public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception { IndexReader reader = IndexReader.open(dir); - Terms terms =MultiFields.getTerms(reader, field); long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext); System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n", - field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext)); + field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext)); } private static void usage() { diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index 82068607292..f394da0a379 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -84,21 +84,6 @@ public class FilterIndexReader extends IndexReader { return in.getComparator(); } - @Override - public int docFreq(BytesRef text) throws IOException { - return in.docFreq(text); - } - - @Override - public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException { - return in.docs(liveDocs, text, reuse); - } - - @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { - return in.docsAndPositions(liveDocs, text, reuse); - } - @Override public long getUniqueTermCount() throws IOException { return in.getUniqueTermCount(); diff --git a/lucene/src/java/org/apache/lucene/index/IndexReader.java b/lucene/src/java/org/apache/lucene/index/IndexReader.java index 5388ae855b0..742fb7cd30e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java @@ -991,7 +991,12 @@ public abstract class IndexReader implements Cloneable,Closeable { if (terms == null) { return 0; } - return terms.docFreq(term); + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docFreq(); + } else { + return 0; + } } /** Returns the number of documents containing the term @@ -1008,7 +1013,12 @@ public abstract class IndexReader implements Cloneable,Closeable { if (terms == null) { return 0; } - return terms.totalTermFreq(term); + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.totalTermFreq(); + } else { + return 0; + } } /** This may return null if the field does not exist.*/ @@ -1027,15 +1037,16 @@ public abstract class IndexReader implements Cloneable,Closeable { assert field != null; assert term != null; final Fields fields = fields(); - if (fields == null) { - return null; - } - final Terms terms = fields.terms(field); - if (terms != null) { - return terms.docs(liveDocs, term, null); - } else { - return null; + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docs(liveDocs, null); + } + } } + return null; } /** Returns {@link DocsAndPositionsEnum} for the specified @@ -1046,15 +1057,16 @@ public abstract class IndexReader implements Cloneable,Closeable { assert field != null; assert term != null; final Fields fields = fields(); - if (fields == null) { - return null; - } - final Terms terms = fields.terms(field); - if (terms != null) { - return terms.docsAndPositions(liveDocs, term, null); - } else { - return null; + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docsAndPositions(liveDocs, null); + } + } } + return null; } /** @@ -1066,15 +1078,15 @@ public abstract class IndexReader implements Cloneable,Closeable { assert state != null; assert field != null; final Fields fields = fields(); - if (fields == null) { - return null; - } - final Terms terms = fields.terms(field); - if (terms != null) { - return terms.docs(liveDocs, term, state, null); - } else { - return null; + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + termsEnum.seekExact(term, state); + return termsEnum.docs(liveDocs, null); + } } + return null; } /** @@ -1086,15 +1098,15 @@ public abstract class IndexReader implements Cloneable,Closeable { assert state != null; assert field != null; final Fields fields = fields(); - if (fields == null) { - return null; - } - final Terms terms = fields.terms(field); - if (terms != null) { - return terms.docsAndPositions(liveDocs, term, state, null); - } else { - return null; + if (fields != null) { + final Terms terms = fields.terms(field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + termsEnum.seekExact(term, state); + return termsEnum.docsAndPositions(liveDocs, null); + } } + return null; } diff --git a/lucene/src/java/org/apache/lucene/index/MultiFields.java b/lucene/src/java/org/apache/lucene/index/MultiFields.java index 7a8c4257c61..2a24a691967 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/src/java/org/apache/lucene/index/MultiFields.java @@ -156,10 +156,12 @@ public final class MultiFields extends Fields { assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { - return terms.docs(liveDocs, term, null); - } else { - return null; + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docs(liveDocs, null); + } } + return null; } /** Returns {@link DocsAndPositionsEnum} for the specified @@ -170,10 +172,12 @@ public final class MultiFields extends Fields { assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { - return terms.docsAndPositions(liveDocs, term, null); - } else { - return null; + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term, true)) { + return termsEnum.docsAndPositions(liveDocs, null); + } } + return null; } public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { @@ -233,6 +237,17 @@ public final class MultiFields extends Fields { return result; } + public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException { + final Terms terms = getTerms(r, field); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(text, true)) { + return termsEnum.totalTermFreq(); + } + } + return 0; + } + @Override public int getUniqueFieldCount() { return terms.size(); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 7718a3d7170..2c53b963adf 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -36,7 +36,6 @@ import org.apache.lucene.index.codecs.TermVectorsReader; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.StringHelper; @@ -473,17 +472,6 @@ public class SegmentReader extends IndexReader implements Cloneable { return core.fields; } - @Override - public int docFreq(String field, BytesRef term) throws IOException { - ensureOpen(); - Terms terms = core.fields.terms(field); - if (terms != null) { - return terms.docFreq(term); - } else { - return 0; - } - } - @Override public int numDocs() { // Don't call ensureOpen() here (it could affect performance) diff --git a/lucene/src/java/org/apache/lucene/index/Terms.java b/lucene/src/java/org/apache/lucene/index/Terms.java index 873639bb028..174ddce870b 100644 --- a/lucene/src/java/org/apache/lucene/index/Terms.java +++ b/lucene/src/java/org/apache/lucene/index/Terms.java @@ -20,9 +20,7 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Comparator; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.automaton.CompiledAutomaton; /** @@ -32,10 +30,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton; public abstract class Terms { - // Privately cache a TermsEnum per-thread for looking up - // docFreq and getting a private DocsEnum - private final CloseableThreadLocal threadEnums = new CloseableThreadLocal(); - /** Returns an iterator that will step through all * terms. This method will not return null. If you have * a previous TermsEnum, for example from a different @@ -83,81 +77,6 @@ public abstract class Terms { * reuse it. */ public abstract Comparator getComparator() throws IOException; - /** Returns the number of documents containing the - * specified term text. Returns 0 if the term does not - * exist. */ - public int docFreq(BytesRef text) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - if (termsEnum.seekExact(text, true)) { - return termsEnum.docFreq(); - } else { - return 0; - } - } - - /** Returns the total number of occurrences of this term - * across all documents (the sum of the freq() for each - * doc that has this term). This will be -1 if the - * codec doesn't support this measure. Note that, like - * other term measures, this measure does not take - * deleted documents into account. */ - public long totalTermFreq(BytesRef text) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - if (termsEnum.seekExact(text, true)) { - return termsEnum.totalTermFreq(); - } else { - return 0; - } - } - - /** Get {@link DocsEnum} for the specified term. This - * method may return null if the term does not exist. */ - public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - if (termsEnum.seekExact(text, true)) { - return termsEnum.docs(liveDocs, reuse); - } else { - return null; - } - } - - /** Get {@link DocsEnum} for the specified term. This - * method will may return null if the term does not - * exists, or positions were not indexed. */ - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - if (termsEnum.seekExact(text, true)) { - return termsEnum.docsAndPositions(liveDocs, reuse); - } else { - return null; - } - } - - /** - * Expert: Get {@link DocsEnum} for the specified {@link TermState}. - * This method may return null if the term does not exist. - * - * @see TermsEnum#termState() - * @see TermsEnum#seekExact(BytesRef, TermState) */ - public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - termsEnum.seekExact(term, termState); - return termsEnum.docs(liveDocs, reuse); - } - - /** - * Get {@link DocsEnum} for the specified {@link TermState}. This - * method will may return null if the term does not exists, or positions were - * not indexed. - * - * @see TermsEnum#termState() - * @see TermsEnum#seekExact(BytesRef, TermState) */ - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException { - final TermsEnum termsEnum = getThreadTermsEnum(); - termsEnum.seekExact(term, termState); - return termsEnum.docsAndPositions(liveDocs, reuse); - } - /** Returns the number of terms for this field, or -1 if this * measure isn't stored by the codec. Note that, just like * other term measures, this measure does not take deleted @@ -172,7 +91,7 @@ public abstract class Terms { * into account. */ public abstract long getSumTotalTermFreq() throws IOException; - /** Returns the sum of {@link #docFreq(BytesRef)} for + /** Returns the sum of {@link TermsEnum#docFreq()} for * all terms in this field, or -1 if this measure isn't * stored by the codec. Note that, just like other term * measures, this measure does not take deleted documents @@ -185,34 +104,6 @@ public abstract class Terms { * measures, this measure does not take deleted documents * into account. */ public abstract int getDocCount() throws IOException; - - /** - * Returns a thread-private {@link TermsEnum} instance. Obtaining - * {@link TermsEnum} from this method might be more efficient than using - * {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a - * new {@link TermsEnum} instance. - *

- * NOTE: {@link TermsEnum} instances obtained from this method must not be - * shared across threads. The enum should only be used within a local context - * where other threads can't access it. - * - * @return a thread-private {@link TermsEnum} instance - * @throws IOException - * if an IOException occurs - * @lucene.internal - */ - public TermsEnum getThreadTermsEnum() throws IOException { - TermsEnum termsEnum = threadEnums.get(); - if (termsEnum == null) { - termsEnum = iterator(null); - threadEnums.set(termsEnum); - } - return termsEnum; - } - // subclass must close when done: - protected void close() { - threadEnums.close(); - } public final static Terms[] EMPTY_ARRAY = new Terms[0]; } diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java index c94f2c5c058..ee8e333c089 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java @@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs; * limitations under the License. */ -import java.io.Closeable; import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -181,14 +180,8 @@ public class BlockTermsReader extends FieldsProducer { } } } finally { - try { - if (postingsReader != null) { - postingsReader.close(); - } - } finally { - for(FieldReader field : fields.values()) { - field.close(); - } + if (postingsReader != null) { + postingsReader.close(); } } } @@ -238,7 +231,7 @@ public class BlockTermsReader extends FieldsProducer { } } - private class FieldReader extends Terms implements Closeable { + private class FieldReader extends Terms { final long numTerms; final FieldInfo fieldInfo; final long termsStartPointer; @@ -261,11 +254,6 @@ public class BlockTermsReader extends FieldsProducer { return BytesRef.getUTF8SortedAsUnicodeComparator(); } - @Override - public void close() { - super.close(); - } - @Override public TermsEnum iterator(TermsEnum reuse) throws IOException { return new SegmentTermsEnum(); diff --git a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java index 70bbe49669c..b3a0fa0e251 100644 --- a/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java +++ b/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java @@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs; */ import java.io.ByteArrayOutputStream; -import java.io.Closeable; import java.io.IOException; import java.io.PrintStream; import java.util.Collection; @@ -194,9 +193,6 @@ public class BlockTreeTermsReader extends FieldsProducer { try { IOUtils.close(in, postingsReader); } finally { - for(FieldReader field : fields.values()) { - field.close(); - } // Clear so refs to terms index is GCable even if // app hangs onto us: fields.clear(); @@ -392,7 +388,7 @@ public class BlockTreeTermsReader extends FieldsProducer { final Outputs fstOutputs = ByteSequenceOutputs.getSingleton(); final BytesRef NO_OUTPUT = fstOutputs.getNoOutput(); - public final class FieldReader extends Terms implements Closeable { + public final class FieldReader extends Terms { final long numTerms; final FieldInfo fieldInfo; final long sumTotalTermFreq; @@ -450,11 +446,6 @@ public class BlockTreeTermsReader extends FieldsProducer { return BytesRef.getUTF8SortedAsUnicodeComparator(); } - @Override - public void close() { - super.close(); - } - @Override public TermsEnum iterator(TermsEnum reuse) throws IOException { return new SegmentTermsEnum(); @@ -744,7 +735,7 @@ public class BlockTreeTermsReader extends FieldsProducer { } } - private final BytesRef savedStartTerm; + private BytesRef savedStartTerm; // TODO: in some cases we can filter by length? eg // regexp foo*bar must be at least length 6 bytes @@ -784,7 +775,7 @@ public class BlockTreeTermsReader extends FieldsProducer { f.load(rootCode); // for assert: - savedStartTerm = startTerm == null ? null : new BytesRef(startTerm); + assert setSavedStartTerm(startTerm); currentFrame = f; if (startTerm != null) { @@ -792,6 +783,12 @@ public class BlockTreeTermsReader extends FieldsProducer { } } + // only for assert: + private boolean setSavedStartTerm(BytesRef startTerm) { + savedStartTerm = startTerm == null ? null : new BytesRef(startTerm); + return true; + } + @Override public TermState termState() throws IOException { currentFrame.decodeMetaData(); @@ -1163,7 +1160,7 @@ public class BlockTreeTermsReader extends FieldsProducer { // Iterates through terms in this field private final class SegmentTermsEnum extends TermsEnum { - private final IndexInput in; + private IndexInput in; private Frame[] stack; private final Frame staticFrame; @@ -1182,29 +1179,21 @@ public class BlockTreeTermsReader extends FieldsProducer { final BytesRef term = new BytesRef(); - @SuppressWarnings("unchecked") private FST.Arc[] arcs = new FST.Arc[5]; + @SuppressWarnings("unchecked") private FST.Arc[] arcs = new FST.Arc[1]; public SegmentTermsEnum() throws IOException { //if (DEBUG) System.out.println("BTTR.init seg=" + segment); - in = (IndexInput) BlockTreeTermsReader.this.in.clone(); - stack = new Frame[5]; - for(int stackOrd=0;stackOrd(); } - // Init w/ root block; don't use index since it may - // not (and need not) have been loaded - //final FST.Arc arc = index.getFirstArc(arcs[0]); - - // Empty string prefix must have an output in the index! - //assert arc.isFinal(); - currentFrame = staticFrame; final FST.Arc arc; if (index != null) { @@ -1214,8 +1203,9 @@ public class BlockTreeTermsReader extends FieldsProducer { } else { arc = null; } - currentFrame = pushFrame(arc, rootCode, 0); - currentFrame.loadBlock(); + currentFrame = staticFrame; + //currentFrame = pushFrame(arc, rootCode, 0); + //currentFrame.loadBlock(); validIndexPrefix = 0; // if (DEBUG) { // System.out.println("init frame state " + currentFrame.ord); @@ -1226,6 +1216,12 @@ public class BlockTreeTermsReader extends FieldsProducer { // computeBlockStats().print(System.out); } + private void initIndexInput() { + if (this.in == null) { + this.in = (IndexInput) BlockTreeTermsReader.this.in.clone(); + } + } + /** Runs next() through the entire terms dict, * computing aggregate statistics. */ public Stats computeBlockStats() throws IOException { @@ -1975,6 +1971,20 @@ public class BlockTreeTermsReader extends FieldsProducer { @Override public BytesRef next() throws IOException { + if (in == null) { + // Fresh TermsEnum; seek to first term: + final FST.Arc arc; + if (index != null) { + arc = index.getFirstArc(arcs[0]); + // Empty string prefix must have an output in the index! + assert arc.isFinal(); + } else { + arc = null; + } + currentFrame = pushFrame(arc, rootCode, 0); + currentFrame.loadBlock(); + } + targetBeforeCurrentLength = currentFrame.ord; assert !eof; @@ -2242,6 +2252,11 @@ public class BlockTreeTermsReader extends FieldsProducer { use. */ void loadBlock() throws IOException { + // Clone the IndexInput lazily, so that consumers + // that just pull a TermsEnum to + // seekExact(TermState) don't pay this cost: + initIndexInput(); + if (nextEnt != -1) { // Already loaded return; diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 5c296190a05..7dd436b5a35 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -20,20 +20,23 @@ package org.apache.lucene.search; import java.io.IOException; import java.util.*; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.DocsAndPositionsEnum; -import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.util.PriorityQueue; -import org.apache.lucene.util.Bits; /** * MultiPhraseQuery is a generalized version of PhraseQuery, with an added @@ -134,6 +137,7 @@ public class MultiPhraseQuery extends Query { private class MultiPhraseWeight extends Weight { private final Similarity similarity; private final Similarity.Stats stats; + private final Map termContexts = new HashMap(); public MultiPhraseWeight(IndexSearcher searcher) throws IOException { @@ -144,7 +148,11 @@ public class MultiPhraseQuery extends Query { ArrayList allTermStats = new ArrayList(); for(final Term[] terms: termArrays) { for (Term term: terms) { - TermContext termContext = TermContext.build(context, term, true); + TermContext termContext = termContexts.get(term); + if (termContext == null) { + termContext = TermContext.build(context, term, true); + termContexts.put(term, termContext); + } allTermStats.add(searcher.termStatistics(term, termContext)); } } @@ -174,6 +182,14 @@ public class MultiPhraseQuery extends Query { PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; + final Terms fieldTerms = reader.terms(field); + if (fieldTerms == null) { + return null; + } + + // Reuse single TermsEnum below: + final TermsEnum termsEnum = fieldTerms.iterator(null); + for (int pos=0; pos 1) { - postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms); + postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); // coarse -- this overcounts since a given doc can - // have more than one terms: + // have more than one term: docFreq = 0; for(int termIdx=0;termIdx termContexts, TermsEnum termsEnum) throws IOException { List docsEnums = new LinkedList(); for (int i = 0; i < terms.length; i++) { - DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs, - terms[i].field(), - terms[i].bytes()); - if (postings != null) { - docsEnums.add(postings); - } else { - if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) { - // term does exist, but has no positions - throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")"); - } + final Term term = terms[i]; + TermState termState = termContexts.get(term).get(context.ord); + if (termState == null) { + // Term doesn't exist in reader + continue; } + termsEnum.seekExact(term.bytes(), termState); + DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null); + if (postings == null) { + // term does exist, but has no positions + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); + } + docsEnums.add(postings); } _queue = new DocsQueue(docsEnums); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index b5ee7e09af1..030dbd8ccce 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -18,24 +18,24 @@ package org.apache.lucene.search; */ import java.io.IOException; -import java.util.Set; import java.util.ArrayList; +import java.util.Set; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Bits; /** A Query that matches documents containing a particular sequence of terms. * A PhraseQuery is built by QueryParser for input like "new york". @@ -222,27 +222,32 @@ public class PhraseQuery extends Query { final IndexReader reader = context.reader; final Bits liveDocs = acceptDocs; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; + + final Terms fieldTerms = reader.terms(field); + if (fieldTerms == null) { + return null; + } + + // Reuse single TermsEnum below: + final TermsEnum te = fieldTerms.iterator(null); + for (int i = 0; i < terms.size(); i++) { final Term t = terms.get(i); final TermState state = states[i].get(context.ord); if (state == null) { /* term doesnt exist in this segment */ - assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader"; + assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader"; return null; } - DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs, - t.field(), - t.bytes(), - state); + te.seekExact(t.bytes(), state); + DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null); + // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { - assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; + assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader"; // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } - // get the docFreq without seeking - TermsEnum te = reader.fields().terms(field).getThreadTermsEnum(); - te.seekExact(t.bytes(), state); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); } @@ -264,10 +269,9 @@ public class PhraseQuery extends Query { } } + // only called from assert private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { - // only called from assert - final Terms terms = reader.terms(field); - return terms == null || terms.docFreq(bytes) == 0; + return reader.docFreq(field, bytes) == 0; } @Override diff --git a/lucene/src/java/org/apache/lucene/search/TermQuery.java b/lucene/src/java/org/apache/lucene/search/TermQuery.java index ae74d5812c2..0ba8e00e146 100644 --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java @@ -23,7 +23,6 @@ import java.util.Set; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; @@ -41,13 +40,13 @@ import org.apache.lucene.util.ToStringUtils; */ public class TermQuery extends Query { private final Term term; - private int docFreq; - private transient TermContext perReaderTermState; + private final int docFreq; + private final TermContext perReaderTermState; final class TermWeight extends Weight { private final Similarity similarity; private final Similarity.Stats stats; - private transient TermContext termStates; + private final TermContext termStates; public TermWeight(IndexSearcher searcher, TermContext termStates) throws IOException { @@ -108,7 +107,7 @@ public class TermQuery extends Query { return null; } //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); - final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); + final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); return termsEnum; } @@ -116,8 +115,7 @@ public class TermQuery extends Query { private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { // only called from assert //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); - final Terms terms = reader.terms(field); - return terms == null || terms.docFreq(bytes) == 0; + return reader.docFreq(field, bytes) == 0; } @Override diff --git a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 32744b2dc4c..dad477f9d93 100644 --- a/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -17,7 +17,6 @@ package org.apache.lucene.search.spans; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; @@ -26,7 +25,6 @@ import org.apache.lucene.index.TermState; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.Bits; -import org.apache.lucene.util.ReaderUtil; import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; @@ -99,7 +97,7 @@ public class SpanTermQuery extends SpanQuery { if (fields != null) { final Terms terms = fields.terms(term.field()); if (terms != null) { - final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! + final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(term.bytes(), true)) { state = termsEnum.termState(); } else { @@ -119,7 +117,7 @@ public class SpanTermQuery extends SpanQuery { return TermSpans.EMPTY_TERM_SPANS; } - final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); + final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null); diff --git a/lucene/src/java/org/apache/lucene/util/TermContext.java b/lucene/src/java/org/apache/lucene/util/TermContext.java index aed51f57c9f..4fe278e192c 100644 --- a/lucene/src/java/org/apache/lucene/util/TermContext.java +++ b/lucene/src/java/org/apache/lucene/util/TermContext.java @@ -46,7 +46,6 @@ public final class TermContext { //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; - /** * Creates an empty {@link TermContext} from a {@link ReaderContext} */ @@ -94,7 +93,7 @@ public final class TermContext { if (fields != null) { final Terms terms = fields.terms(field); if (terms != null) { - final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! + final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(bytes, cache)) { final TermState termState = termsEnum.termState(); //if (DEBUG) System.out.println(" found"); diff --git a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java index 80904a5adb7..68ebb457758 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java @@ -349,7 +349,9 @@ public class TestDocsAndPositions extends LuceneTestCase { assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again - disi = r.terms("foo").docs(null, new BytesRef("bar"), disi); + TermsEnum te = r.terms("foo").iterator(null); + assertTrue(te.seekExact(new BytesRef("bar"), true)); + disi = te.docs(null, disi); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -372,7 +374,9 @@ public class TestDocsAndPositions extends LuceneTestCase { assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again - disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi); + TermsEnum te = r.terms("foo").iterator(null); + assertTrue(te.seekExact(new BytesRef("bar"), true)); + disi = te.docsAndPositions(null, disi); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java index ab3bf338741..d8ef193a036 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java @@ -199,7 +199,7 @@ public class TestDocumentWriter extends LuceneTestCase { writer.close(); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); - DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null); + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a")); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); int freq = termPositions.freq(); assertEquals(3, freq); @@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase { writer.close(); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); - DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null); + DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1")); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(0, termPositions.nextPosition()); - termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null); + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2")); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(2, termPositions.freq()); assertEquals(1, termPositions.nextPosition()); assertEquals(3, termPositions.nextPosition()); - termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null); + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3")); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(2, termPositions.nextPosition()); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 2da09753ce4..68793750708 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -1340,13 +1340,12 @@ public class TestIndexReader extends LuceneTestCase writer.addDocument(d); IndexReader r = writer.getReader(); writer.close(); - Terms terms = MultiFields.getTerms(r, "f"); try { // Make sure codec impls totalTermFreq (eg PreFlex doesn't) - Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1); - assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); - assertEquals(2, terms.totalTermFreq(new BytesRef("a"))); - assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); + Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1); + assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b"))); + assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a"))); + assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b"))); } finally { r.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java index 5bcf3de5524..916837faa9e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java @@ -113,7 +113,6 @@ public class TestMultiFields extends LuceneTestCase { for(int delDoc : deleted) { assertFalse(liveDocs.get(delDoc)); } - Terms terms2 = MultiFields.getTerms(reader, "field"); for(int i=0;i<100;i++) { BytesRef term = terms.get(random.nextInt(terms.size())); @@ -121,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase { System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); } - DocsEnum docsEnum = terms2.docs(liveDocs, term, null); + DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term); assertNotNull(docsEnum); for(int docID : docs.get(term)) { diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java index 68b5ed19e99..1383e54a706 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java @@ -447,7 +447,7 @@ public class TestOmitTf extends LuceneTestCase { IndexReader ir = iw.getReader(); iw.close(); Terms terms = MultiFields.getTerms(ir, "foo"); - assertEquals(-1, terms.totalTermFreq(new BytesRef("bar"))); + assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar"))); assertEquals(-1, terms.getSumTotalTermFreq()); ir.close(); dir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java index 65269a1acba..6bade83129c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java +++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java @@ -342,6 +342,7 @@ public class TestStressIndexing2 extends LuceneTestCase { return; } Terms terms2 = fields.terms(idField); + TermsEnum termsEnum2 = terms2.iterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; @@ -354,7 +355,11 @@ public class TestStressIndexing2 extends LuceneTestCase { } termDocs1 = termsEnum.docs(liveDocs1, termDocs1); - termDocs2 = terms2.docs(liveDocs2, term, termDocs2); + if (termsEnum2.seekExact(term, false)) { + termDocs2 = termsEnum2.docs(liveDocs2, termDocs2); + } else { + termDocs2 = null; + } if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) { // This doc is deleted and wasn't replaced @@ -397,11 +402,11 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" " + field + ":"); Terms terms3 = fieldsEnum.terms(); assertNotNull(terms3); - TermsEnum termsEnum2 = terms3.iterator(null); + TermsEnum termsEnum3 = terms3.iterator(null); BytesRef term2; - while((term2 = termsEnum2.next()) != null) { - System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); - dpEnum = termsEnum2.docsAndPositions(null, dpEnum); + while((term2 = termsEnum3.next()) != null) { + System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dpEnum.freq(); @@ -410,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { - dEnum = termsEnum2.docs(null, dEnum); + dEnum = termsEnum3.docs(null, dEnum); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dEnum.freq(); @@ -431,11 +436,11 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" " + field + ":"); Terms terms3 = fieldsEnum.terms(); assertNotNull(terms3); - TermsEnum termsEnum2 = terms3.iterator(null); + TermsEnum termsEnum3 = terms3.iterator(null); BytesRef term2; - while((term2 = termsEnum2.next()) != null) { - System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); - dpEnum = termsEnum2.docsAndPositions(null, dpEnum); + while((term2 = termsEnum3.next()) != null) { + System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dpEnum.freq(); @@ -444,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { - dEnum = termsEnum2.docs(null, dEnum); + dEnum = termsEnum3.docs(null, dEnum); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dEnum.freq(); @@ -467,7 +472,7 @@ public class TestStressIndexing2 extends LuceneTestCase { String field1=null, field2=null; TermsEnum termsEnum1 = null; - TermsEnum termsEnum2 = null; + termsEnum2 = null; DocsEnum docs1=null, docs2=null; // pack both doc and freq into single element for easy sorting diff --git a/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java b/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java index 42bfb1cac90..0a54de72985 100755 --- a/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java +++ b/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java @@ -59,7 +59,17 @@ public class TFValueSource extends TermFreqValueSource { public void reset() throws IOException { // no one should call us for deleted docs? - docs = terms==null ? null : terms.docs(null, indexedBytes, null); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(indexedBytes, false)) { + docs = termsEnum.docs(null, null); + } else { + docs = null; + } + } else { + docs = null; + } + if (docs == null) { docs = new DocsEnum() { @Override diff --git a/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java b/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java index b6e53aa9ea3..9ac168256ff 100755 --- a/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java +++ b/modules/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java @@ -51,7 +51,18 @@ public class TermFreqValueSource extends DocFreqValueSource { public void reset() throws IOException { // no one should call us for deleted docs? - docs = terms == null ? null : terms.docs(null, indexedBytes, null); + + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(indexedBytes, false)) { + docs = termsEnum.docs(null, null); + } else { + docs = null; + } + } else { + docs = null; + } + if (docs == null) { docs = new DocsEnum() { @Override diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 6c8d7c6728f..b26e7e484fa 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -555,7 +555,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); - DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null); + final TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(termBytes, false)) { + return -1; + } + DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null); if (docs == null) return -1; int id = docs.nextDoc(); return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; @@ -947,7 +951,13 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { BytesRef termBytes = t.bytes(); Bits liveDocs = reader.getLiveDocs(); - DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null); + DocsEnum docsEnum = null; + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(termBytes, false)) { + docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null); + } + } if (docsEnum != null) { DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult(); diff --git a/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java b/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java index 5b33c699cab..09f1b895e9c 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java +++ b/solr/core/src/test/org/apache/solr/search/TestRealTimeGet.java @@ -723,8 +723,11 @@ public class TestRealTimeGet extends SolrTestCaseJ4 { Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); - DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null); - if (docs == null) return -1; + final TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(termBytes, false)) { + return -1; + } + DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null); int id = docs.nextDoc(); if (id != DocIdSetIterator.NO_MORE_DOCS) { int next = docs.nextDoc();