LUCENE-3562: stop caching thread-private TermsEnums in Terms

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-11-17 17:35:42 +00:00
parent 5a3b635239
commit 9e27723b37
23 changed files with 280 additions and 319 deletions

View File

@ -21,9 +21,7 @@ import java.io.File;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
/*
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
@ -50,10 +48,9 @@ public class GetTermInfo {
public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception {
IndexReader reader = IndexReader.open(dir);
Terms terms =MultiFields.getTerms(reader, field);
long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext);
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext));
field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext));
}
private static void usage() {

View File

@ -84,21 +84,6 @@ public class FilterIndexReader extends IndexReader {
return in.getComparator();
}
@Override
public int docFreq(BytesRef text) throws IOException {
return in.docFreq(text);
}
@Override
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
return in.docs(liveDocs, text, reuse);
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
return in.docsAndPositions(liveDocs, text, reuse);
}
@Override
public long getUniqueTermCount() throws IOException {
return in.getUniqueTermCount();

View File

@ -991,7 +991,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms == null) {
return 0;
}
return terms.docFreq(term);
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docFreq();
} else {
return 0;
}
}
/** Returns the number of documents containing the term
@ -1008,7 +1013,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms == null) {
return 0;
}
return terms.totalTermFreq(term);
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.totalTermFreq();
} else {
return 0;
}
}
/** This may return null if the field does not exist.*/
@ -1027,16 +1037,17 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert field != null;
assert term != null;
final Fields fields = fields();
if (fields == null) {
return null;
}
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
return terms.docs(liveDocs, term, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null);
}
}
}
return null;
}
/** Returns {@link DocsAndPositionsEnum} for the specified
* field & term. This may return null, if either the
@ -1046,16 +1057,17 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert field != null;
assert term != null;
final Fields fields = fields();
if (fields == null) {
return null;
}
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
return terms.docsAndPositions(liveDocs, term, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docsAndPositions(liveDocs, null);
}
}
}
return null;
}
/**
* Returns {@link DocsEnum} for the specified field and
@ -1066,16 +1078,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert state != null;
assert field != null;
final Fields fields = fields();
if (fields == null) {
return null;
}
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
return terms.docs(liveDocs, term, state, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
termsEnum.seekExact(term, state);
return termsEnum.docs(liveDocs, null);
}
}
return null;
}
/**
* Returns {@link DocsAndPositionsEnum} for the specified field and
@ -1086,16 +1098,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert state != null;
assert field != null;
final Fields fields = fields();
if (fields == null) {
return null;
}
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
return terms.docsAndPositions(liveDocs, term, state, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
termsEnum.seekExact(term, state);
return termsEnum.docsAndPositions(liveDocs, null);
}
}
return null;
}
/** Deletes the document numbered <code>docNum</code>. Once a document is

View File

@ -156,11 +156,13 @@ public final class MultiFields extends Fields {
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
return terms.docs(liveDocs, term, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null);
}
}
return null;
}
/** Returns {@link DocsAndPositionsEnum} for the specified
* field & term. This may return null if the term does
@ -170,11 +172,13 @@ public final class MultiFields extends Fields {
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
return terms.docsAndPositions(liveDocs, term, null);
} else {
return null;
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docsAndPositions(liveDocs, null);
}
}
return null;
}
public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
this.subs = subs;
@ -233,6 +237,17 @@ public final class MultiFields extends Fields {
return result;
}
public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(text, true)) {
return termsEnum.totalTermFreq();
}
}
return 0;
}
@Override
public int getUniqueFieldCount() {
return terms.size();

View File

@ -36,7 +36,6 @@ import org.apache.lucene.index.codecs.TermVectorsReader;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.StringHelper;
@ -473,17 +472,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
return core.fields;
}
@Override
public int docFreq(String field, BytesRef term) throws IOException {
ensureOpen();
Terms terms = core.fields.terms(field);
if (terms != null) {
return terms.docFreq(term);
} else {
return 0;
}
}
@Override
public int numDocs() {
// Don't call ensureOpen() here (it could affect performance)

View File

@ -20,9 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
@ -32,10 +30,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
public abstract class Terms {
// Privately cache a TermsEnum per-thread for looking up
// docFreq and getting a private DocsEnum
private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();
/** Returns an iterator that will step through all
* terms. This method will not return null. If you have
* a previous TermsEnum, for example from a different
@ -83,81 +77,6 @@ public abstract class Terms {
* reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException;
/** Returns the number of documents containing the
* specified term text. Returns 0 if the term does not
* exist. */
public int docFreq(BytesRef text) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docFreq();
} else {
return 0;
}
}
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
* doc that has this term). This will be -1 if the
* codec doesn't support this measure. Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
public long totalTermFreq(BytesRef text) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.totalTermFreq();
} else {
return 0;
}
}
/** Get {@link DocsEnum} for the specified term. This
* method may return null if the term does not exist. */
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docs(liveDocs, reuse);
} else {
return null;
}
}
/** Get {@link DocsEnum} for the specified term. This
* method will may return null if the term does not
* exists, or positions were not indexed. */
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docsAndPositions(liveDocs, reuse);
} else {
return null;
}
}
/**
* Expert: Get {@link DocsEnum} for the specified {@link TermState}.
* This method may return <code>null</code> if the term does not exist.
*
* @see TermsEnum#termState()
* @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
termsEnum.seekExact(term, termState);
return termsEnum.docs(liveDocs, reuse);
}
/**
* Get {@link DocsEnum} for the specified {@link TermState}. This
* method will may return <code>null</code> if the term does not exists, or positions were
* not indexed.
*
* @see TermsEnum#termState()
* @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
termsEnum.seekExact(term, termState);
return termsEnum.docsAndPositions(liveDocs, reuse);
}
/** Returns the number of terms for this field, or -1 if this
* measure isn't stored by the codec. Note that, just like
* other term measures, this measure does not take deleted
@ -172,7 +91,7 @@ public abstract class Terms {
* into account. */
public abstract long getSumTotalTermFreq() throws IOException;
/** Returns the sum of {@link #docFreq(BytesRef)} for
/** Returns the sum of {@link TermsEnum#docFreq()} for
* all terms in this field, or -1 if this measure isn't
* stored by the codec. Note that, just like other term
* measures, this measure does not take deleted documents
@ -186,33 +105,5 @@ public abstract class Terms {
* into account. */
public abstract int getDocCount() throws IOException;
/**
* Returns a thread-private {@link TermsEnum} instance. Obtaining
* {@link TermsEnum} from this method might be more efficient than using
* {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
* new {@link TermsEnum} instance.
* <p>
* NOTE: {@link TermsEnum} instances obtained from this method must not be
* shared across threads. The enum should only be used within a local context
* where other threads can't access it.
*
* @return a thread-private {@link TermsEnum} instance
* @throws IOException
* if an IOException occurs
* @lucene.internal
*/
public TermsEnum getThreadTermsEnum() throws IOException {
TermsEnum termsEnum = threadEnums.get();
if (termsEnum == null) {
termsEnum = iterator(null);
threadEnums.set(termsEnum);
}
return termsEnum;
}
// subclass must close when done:
protected void close() {
threadEnums.close();
}
public final static Terms[] EMPTY_ARRAY = new Terms[0];
}

View File

@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.Collection;
import java.util.Comparator;
@ -181,15 +180,9 @@ public class BlockTermsReader extends FieldsProducer {
}
}
} finally {
try {
if (postingsReader != null) {
postingsReader.close();
}
} finally {
for(FieldReader field : fields.values()) {
field.close();
}
}
}
}
@ -238,7 +231,7 @@ public class BlockTermsReader extends FieldsProducer {
}
}
private class FieldReader extends Terms implements Closeable {
private class FieldReader extends Terms {
final long numTerms;
final FieldInfo fieldInfo;
final long termsStartPointer;
@ -261,11 +254,6 @@ public class BlockTermsReader extends FieldsProducer {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void close() {
super.close();
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs;
*/
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Collection;
@ -194,9 +193,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
try {
IOUtils.close(in, postingsReader);
} finally {
for(FieldReader field : fields.values()) {
field.close();
}
// Clear so refs to terms index is GCable even if
// app hangs onto us:
fields.clear();
@ -392,7 +388,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
public final class FieldReader extends Terms implements Closeable {
public final class FieldReader extends Terms {
final long numTerms;
final FieldInfo fieldInfo;
final long sumTotalTermFreq;
@ -450,11 +446,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
return BytesRef.getUTF8SortedAsUnicodeComparator();
}
@Override
public void close() {
super.close();
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum();
@ -744,7 +735,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
}
private final BytesRef savedStartTerm;
private BytesRef savedStartTerm;
// TODO: in some cases we can filter by length? eg
// regexp foo*bar must be at least length 6 bytes
@ -784,7 +775,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
f.load(rootCode);
// for assert:
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
assert setSavedStartTerm(startTerm);
currentFrame = f;
if (startTerm != null) {
@ -792,6 +783,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
}
// only for assert:
private boolean setSavedStartTerm(BytesRef startTerm) {
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
return true;
}
@Override
public TermState termState() throws IOException {
currentFrame.decodeMetaData();
@ -1163,7 +1160,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
// Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in;
private IndexInput in;
private Frame[] stack;
private final Frame staticFrame;
@ -1182,29 +1179,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
final BytesRef term = new BytesRef();
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
public SegmentTermsEnum() throws IOException {
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
in = (IndexInput) BlockTreeTermsReader.this.in.clone();
stack = new Frame[5];
for(int stackOrd=0;stackOrd<stack.length;stackOrd++) {
stack[stackOrd] = new Frame(stackOrd);
}
stack = new Frame[0];
// Used to hold seek by TermState, or cached seek
staticFrame = new Frame(-1);
// Init w/ root block; don't use index since it may
// not (and need not) have been loaded
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
arcs[arcIdx] = new FST.Arc<BytesRef>();
}
// Init w/ root block; don't use index since it may
// not (and need not) have been loaded
//final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
//assert arc.isFinal();
currentFrame = staticFrame;
final FST.Arc<BytesRef> arc;
if (index != null) {
@ -1214,8 +1203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
} else {
arc = null;
}
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.loadBlock();
currentFrame = staticFrame;
//currentFrame = pushFrame(arc, rootCode, 0);
//currentFrame.loadBlock();
validIndexPrefix = 0;
// if (DEBUG) {
// System.out.println("init frame state " + currentFrame.ord);
@ -1226,6 +1216,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
// computeBlockStats().print(System.out);
}
private void initIndexInput() {
if (this.in == null) {
this.in = (IndexInput) BlockTreeTermsReader.this.in.clone();
}
}
/** Runs next() through the entire terms dict,
* computing aggregate statistics. */
public Stats computeBlockStats() throws IOException {
@ -1975,6 +1971,20 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override
public BytesRef next() throws IOException {
if (in == null) {
// Fresh TermsEnum; seek to first term:
final FST.Arc<BytesRef> arc;
if (index != null) {
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
} else {
arc = null;
}
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.loadBlock();
}
targetBeforeCurrentLength = currentFrame.ord;
assert !eof;
@ -2242,6 +2252,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
use. */
void loadBlock() throws IOException {
// Clone the IndexInput lazily, so that consumers
// that just pull a TermsEnum to
// seekExact(TermState) don't pay this cost:
initIndexInput();
if (nextEnt != -1) {
// Already loaded
return;

View File

@ -20,20 +20,23 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.Bits;
/**
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
@ -134,6 +137,7 @@ public class MultiPhraseQuery extends Query {
private class MultiPhraseWeight extends Weight {
private final Similarity similarity;
private final Similarity.Stats stats;
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
public MultiPhraseWeight(IndexSearcher searcher)
throws IOException {
@ -144,7 +148,11 @@ public class MultiPhraseQuery extends Query {
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
for(final Term[] terms: termArrays) {
for (Term term: terms) {
TermContext termContext = TermContext.build(context, term, true);
TermContext termContext = termContexts.get(term);
if (termContext == null) {
termContext = TermContext.build(context, term, true);
termContexts.put(term, termContext);
}
allTermStats.add(searcher.termStatistics(term, termContext));
}
}
@ -174,6 +182,14 @@ public class MultiPhraseQuery extends Query {
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator(null);
for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos);
@ -181,31 +197,43 @@ public class MultiPhraseQuery extends Query {
int docFreq;
if (terms.length > 1) {
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms);
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
// coarse -- this overcounts since a given doc can
// have more than one terms:
// have more than one term:
docFreq = 0;
for(int termIdx=0;termIdx<terms.length;termIdx++) {
docFreq += reader.docFreq(terms[termIdx]);
final Term term = terms[termIdx];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
docFreq += termsEnum.docFreq();
}
if (docFreq == 0) {
// None of the terms are in this reader
return null;
}
} else {
final Term term = terms[0];
postingsEnum = reader.termPositionsEnum(liveDocs,
term.field(),
term.bytes());
if (postingsEnum == null) {
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
} else {
// term does not exist
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
return null;
}
termsEnum.seekExact(term.bytes(), termState);
postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
if (postingsEnum == null) {
// term does exist, but has no positions
assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
docFreq = reader.docFreq(term.field(), term.bytes());
docFreq = termsEnum.docFreq();
}
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
@ -437,20 +465,22 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
private DocsQueue _queue;
private IntQueue _posList;
public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException {
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
for (int i = 0; i < terms.length; i++) {
DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs,
terms[i].field(),
terms[i].bytes());
if (postings != null) {
docsEnums.add(postings);
} else {
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term doesn't exist in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
}
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
docsEnums.add(postings);
}
_queue = new DocsQueue(docsEnums);

View File

@ -18,24 +18,24 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.Set;
import java.util.ArrayList;
import java.util.Set;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
/** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
@ -222,6 +222,15 @@ public class PhraseQuery extends Query {
final IndexReader reader = context.reader;
final Bits liveDocs = acceptDocs;
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum te = fieldTerms.iterator(null);
for (int i = 0; i < terms.size(); i++) {
final Term t = terms.get(i);
final TermState state = states[i].get(context.ord);
@ -229,20 +238,16 @@ public class PhraseQuery extends Query {
assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader";
return null;
}
DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs,
t.field(),
t.bytes(),
state);
te.seekExact(t.bytes(), state);
DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
// PhraseQuery on a field that did not index
// positions.
if (postingsEnum == null) {
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}
// get the docFreq without seeking
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
te.seekExact(t.bytes(), state);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
}
@ -264,10 +269,9 @@ public class PhraseQuery extends Query {
}
}
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert
final Terms terms = reader.terms(field);
return terms == null || terms.docFreq(bytes) == 0;
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
return reader.docFreq(field, bytes) == 0;
}
@Override

View File

@ -23,7 +23,6 @@ import java.util.Set;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
@ -41,13 +40,13 @@ import org.apache.lucene.util.ToStringUtils;
*/
public class TermQuery extends Query {
private final Term term;
private int docFreq;
private transient TermContext perReaderTermState;
private final int docFreq;
private final TermContext perReaderTermState;
final class TermWeight extends Weight {
private final Similarity similarity;
private final Similarity.Stats stats;
private transient TermContext termStates;
private final TermContext termStates;
public TermWeight(IndexSearcher searcher, TermContext termStates)
throws IOException {
@ -108,7 +107,7 @@ public class TermQuery extends Query {
return null;
}
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
return termsEnum;
}
@ -116,8 +115,7 @@ public class TermQuery extends Query {
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
final Terms terms = reader.terms(field);
return terms == null || terms.docFreq(bytes) == 0;
return reader.docFreq(field, bytes) == 0;
}
@Override

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term;
@ -26,7 +25,6 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
@ -99,7 +97,7 @@ public class SpanTermQuery extends SpanQuery {
if (fields != null) {
final Terms terms = fields.terms(term.field());
if (terms != null) {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term.bytes(), true)) {
state = termsEnum.termState();
} else {
@ -119,7 +117,7 @@ public class SpanTermQuery extends SpanQuery {
return TermSpans.EMPTY_TERM_SPANS;
}
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);

View File

@ -46,7 +46,6 @@ public final class TermContext {
//public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
/**
* Creates an empty {@link TermContext} from a {@link ReaderContext}
*/
@ -94,7 +93,7 @@ public final class TermContext {
if (fields != null) {
final Terms terms = fields.terms(field);
if (terms != null) {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(bytes, cache)) {
final TermState termState = termsEnum.termState();
//if (DEBUG) System.out.println(" found");

View File

@ -349,7 +349,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
disi = r.terms("foo").docs(null, new BytesRef("bar"), disi);
TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docs(null, disi);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@ -372,7 +374,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again
disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi);
TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docsAndPositions(null, disi);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

View File

@ -199,7 +199,7 @@ public class TestDocumentWriter extends LuceneTestCase {
writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null);
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
int freq = termPositions.freq();
assertEquals(3, freq);
@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase {
writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null);
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition());
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null);
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition());
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null);
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition());

View File

@ -1340,13 +1340,12 @@ public class TestIndexReader extends LuceneTestCase
writer.addDocument(d);
IndexReader r = writer.getReader();
writer.close();
Terms terms = MultiFields.getTerms(r, "f");
try {
// Make sure codec impls totalTermFreq (eg PreFlex doesn't)
Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1);
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
assertEquals(2, terms.totalTermFreq(new BytesRef("a")));
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1);
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a")));
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
} finally {
r.close();
dir.close();

View File

@ -113,7 +113,6 @@ public class TestMultiFields extends LuceneTestCase {
for(int delDoc : deleted) {
assertFalse(liveDocs.get(delDoc));
}
Terms terms2 = MultiFields.getTerms(reader, "field");
for(int i=0;i<100;i++) {
BytesRef term = terms.get(random.nextInt(terms.size()));
@ -121,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
}
DocsEnum docsEnum = terms2.docs(liveDocs, term, null);
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
assertNotNull(docsEnum);
for(int docID : docs.get(term)) {

View File

@ -447,7 +447,7 @@ public class TestOmitTf extends LuceneTestCase {
IndexReader ir = iw.getReader();
iw.close();
Terms terms = MultiFields.getTerms(ir, "foo");
assertEquals(-1, terms.totalTermFreq(new BytesRef("bar")));
assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar")));
assertEquals(-1, terms.getSumTotalTermFreq());
ir.close();
dir.close();

View File

@ -342,6 +342,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
return;
}
Terms terms2 = fields.terms(idField);
TermsEnum termsEnum2 = terms2.iterator(null);
DocsEnum termDocs1 = null;
DocsEnum termDocs2 = null;
@ -354,7 +355,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
}
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
termDocs2 = terms2.docs(liveDocs2, term, termDocs2);
if (termsEnum2.seekExact(term, false)) {
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
} else {
termDocs2 = null;
}
if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
// This doc is deleted and wasn't replaced
@ -397,11 +402,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" " + field + ":");
Terms terms3 = fieldsEnum.terms();
assertNotNull(terms3);
TermsEnum termsEnum2 = terms3.iterator(null);
TermsEnum termsEnum3 = terms3.iterator(null);
BytesRef term2;
while((term2 = termsEnum2.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
if (dpEnum != null) {
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dpEnum.freq();
@ -410,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = termsEnum2.docs(null, dEnum);
dEnum = termsEnum3.docs(null, dEnum);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq();
@ -431,11 +436,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" " + field + ":");
Terms terms3 = fieldsEnum.terms();
assertNotNull(terms3);
TermsEnum termsEnum2 = terms3.iterator(null);
TermsEnum termsEnum3 = terms3.iterator(null);
BytesRef term2;
while((term2 = termsEnum2.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
if (dpEnum != null) {
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dpEnum.freq();
@ -444,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = termsEnum2.docs(null, dEnum);
dEnum = termsEnum3.docs(null, dEnum);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq();
@ -467,7 +472,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
String field1=null, field2=null;
TermsEnum termsEnum1 = null;
TermsEnum termsEnum2 = null;
termsEnum2 = null;
DocsEnum docs1=null, docs2=null;
// pack both doc and freq into single element for easy sorting

View File

@ -59,7 +59,17 @@ public class TFValueSource extends TermFreqValueSource {
public void reset() throws IOException {
// no one should call us for deleted docs?
docs = terms==null ? null : terms.docs(null, indexedBytes, null);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override

View File

@ -51,7 +51,18 @@ public class TermFreqValueSource extends DocFreqValueSource {
public void reset() throws IOException {
// no one should call us for deleted docs?
docs = terms == null ? null : terms.docs(null, indexedBytes, null);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) {
docs = new DocsEnum() {
@Override

View File

@ -555,7 +555,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null);
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
@ -947,7 +951,13 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
BytesRef termBytes = t.bytes();
Bits liveDocs = reader.getLiveDocs();
DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null);
DocsEnum docsEnum = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(termBytes, false)) {
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
}
}
if (docsEnum != null) {
DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult();

View File

@ -723,8 +723,11 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null);
if (docs == null) return -1;
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();