LUCENE-3562: stop caching thread-private TermsEnums in Terms

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-11-17 17:35:42 +00:00
parent 5a3b635239
commit 9e27723b37
23 changed files with 280 additions and 319 deletions

View File

@ -21,9 +21,7 @@ import java.io.File;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
/* /*
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term. * Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
@ -50,10 +48,9 @@ public class GetTermInfo {
public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception { public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception {
IndexReader reader = IndexReader.open(dir); IndexReader reader = IndexReader.open(dir);
Terms terms =MultiFields.getTerms(reader, field);
long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext); long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext);
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n", System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext)); field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext));
} }
private static void usage() { private static void usage() {

View File

@ -84,21 +84,6 @@ public class FilterIndexReader extends IndexReader {
return in.getComparator(); return in.getComparator();
} }
@Override
public int docFreq(BytesRef text) throws IOException {
return in.docFreq(text);
}
@Override
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
return in.docs(liveDocs, text, reuse);
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
return in.docsAndPositions(liveDocs, text, reuse);
}
@Override @Override
public long getUniqueTermCount() throws IOException { public long getUniqueTermCount() throws IOException {
return in.getUniqueTermCount(); return in.getUniqueTermCount();

View File

@ -991,7 +991,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms == null) { if (terms == null) {
return 0; return 0;
} }
return terms.docFreq(term); final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docFreq();
} else {
return 0;
}
} }
/** Returns the number of documents containing the term /** Returns the number of documents containing the term
@ -1008,7 +1013,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms == null) { if (terms == null) {
return 0; return 0;
} }
return terms.totalTermFreq(term); final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.totalTermFreq();
} else {
return 0;
}
} }
/** This may return null if the field does not exist.*/ /** This may return null if the field does not exist.*/
@ -1027,15 +1037,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert field != null; assert field != null;
assert term != null; assert term != null;
final Fields fields = fields(); final Fields fields = fields();
if (fields == null) { if (fields != null) {
return null; final Terms terms = fields.terms(field);
} if (terms != null) {
final Terms terms = fields.terms(field); final TermsEnum termsEnum = terms.iterator(null);
if (terms != null) { if (termsEnum.seekExact(term, true)) {
return terms.docs(liveDocs, term, null); return termsEnum.docs(liveDocs, null);
} else { }
return null; }
} }
return null;
} }
/** Returns {@link DocsAndPositionsEnum} for the specified /** Returns {@link DocsAndPositionsEnum} for the specified
@ -1046,15 +1057,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert field != null; assert field != null;
assert term != null; assert term != null;
final Fields fields = fields(); final Fields fields = fields();
if (fields == null) { if (fields != null) {
return null; final Terms terms = fields.terms(field);
} if (terms != null) {
final Terms terms = fields.terms(field); final TermsEnum termsEnum = terms.iterator(null);
if (terms != null) { if (termsEnum.seekExact(term, true)) {
return terms.docsAndPositions(liveDocs, term, null); return termsEnum.docsAndPositions(liveDocs, null);
} else { }
return null; }
} }
return null;
} }
/** /**
@ -1066,15 +1078,15 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert state != null; assert state != null;
assert field != null; assert field != null;
final Fields fields = fields(); final Fields fields = fields();
if (fields == null) { if (fields != null) {
return null; final Terms terms = fields.terms(field);
} if (terms != null) {
final Terms terms = fields.terms(field); final TermsEnum termsEnum = terms.iterator(null);
if (terms != null) { termsEnum.seekExact(term, state);
return terms.docs(liveDocs, term, state, null); return termsEnum.docs(liveDocs, null);
} else { }
return null;
} }
return null;
} }
/** /**
@ -1086,15 +1098,15 @@ public abstract class IndexReader implements Cloneable,Closeable {
assert state != null; assert state != null;
assert field != null; assert field != null;
final Fields fields = fields(); final Fields fields = fields();
if (fields == null) { if (fields != null) {
return null; final Terms terms = fields.terms(field);
} if (terms != null) {
final Terms terms = fields.terms(field); final TermsEnum termsEnum = terms.iterator(null);
if (terms != null) { termsEnum.seekExact(term, state);
return terms.docsAndPositions(liveDocs, term, state, null); return termsEnum.docsAndPositions(liveDocs, null);
} else { }
return null;
} }
return null;
} }

View File

@ -156,10 +156,12 @@ public final class MultiFields extends Fields {
assert term != null; assert term != null;
final Terms terms = getTerms(r, field); final Terms terms = getTerms(r, field);
if (terms != null) { if (terms != null) {
return terms.docs(liveDocs, term, null); final TermsEnum termsEnum = terms.iterator(null);
} else { if (termsEnum.seekExact(term, true)) {
return null; return termsEnum.docs(liveDocs, null);
}
} }
return null;
} }
/** Returns {@link DocsAndPositionsEnum} for the specified /** Returns {@link DocsAndPositionsEnum} for the specified
@ -170,10 +172,12 @@ public final class MultiFields extends Fields {
assert term != null; assert term != null;
final Terms terms = getTerms(r, field); final Terms terms = getTerms(r, field);
if (terms != null) { if (terms != null) {
return terms.docsAndPositions(liveDocs, term, null); final TermsEnum termsEnum = terms.iterator(null);
} else { if (termsEnum.seekExact(term, true)) {
return null; return termsEnum.docsAndPositions(liveDocs, null);
}
} }
return null;
} }
public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) { public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
@ -233,6 +237,17 @@ public final class MultiFields extends Fields {
return result; return result;
} }
public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(text, true)) {
return termsEnum.totalTermFreq();
}
}
return 0;
}
@Override @Override
public int getUniqueFieldCount() { public int getUniqueFieldCount() {
return terms.size(); return terms.size();

View File

@ -36,7 +36,6 @@ import org.apache.lucene.index.codecs.TermVectorsReader;
import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
@ -473,17 +472,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
return core.fields; return core.fields;
} }
@Override
public int docFreq(String field, BytesRef term) throws IOException {
ensureOpen();
Terms terms = core.fields.terms(field);
if (terms != null) {
return terms.docFreq(term);
} else {
return 0;
}
}
@Override @Override
public int numDocs() { public int numDocs() {
// Don't call ensureOpen() here (it could affect performance) // Don't call ensureOpen() here (it could affect performance)

View File

@ -20,9 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.Comparator; import java.util.Comparator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.automaton.CompiledAutomaton; import org.apache.lucene.util.automaton.CompiledAutomaton;
/** /**
@ -32,10 +30,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
public abstract class Terms { public abstract class Terms {
// Privately cache a TermsEnum per-thread for looking up
// docFreq and getting a private DocsEnum
private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();
/** Returns an iterator that will step through all /** Returns an iterator that will step through all
* terms. This method will not return null. If you have * terms. This method will not return null. If you have
* a previous TermsEnum, for example from a different * a previous TermsEnum, for example from a different
@ -83,81 +77,6 @@ public abstract class Terms {
* reuse it. */ * reuse it. */
public abstract Comparator<BytesRef> getComparator() throws IOException; public abstract Comparator<BytesRef> getComparator() throws IOException;
/** Returns the number of documents containing the
* specified term text. Returns 0 if the term does not
* exist. */
public int docFreq(BytesRef text) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docFreq();
} else {
return 0;
}
}
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
* doc that has this term). This will be -1 if the
* codec doesn't support this measure. Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
public long totalTermFreq(BytesRef text) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.totalTermFreq();
} else {
return 0;
}
}
/** Get {@link DocsEnum} for the specified term. This
* method may return null if the term does not exist. */
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docs(liveDocs, reuse);
} else {
return null;
}
}
/** Get {@link DocsEnum} for the specified term. This
* method will may return null if the term does not
* exists, or positions were not indexed. */
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
if (termsEnum.seekExact(text, true)) {
return termsEnum.docsAndPositions(liveDocs, reuse);
} else {
return null;
}
}
/**
* Expert: Get {@link DocsEnum} for the specified {@link TermState}.
* This method may return <code>null</code> if the term does not exist.
*
* @see TermsEnum#termState()
* @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
termsEnum.seekExact(term, termState);
return termsEnum.docs(liveDocs, reuse);
}
/**
* Get {@link DocsEnum} for the specified {@link TermState}. This
* method will may return <code>null</code> if the term does not exists, or positions were
* not indexed.
*
* @see TermsEnum#termState()
* @see TermsEnum#seekExact(BytesRef, TermState) */
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
termsEnum.seekExact(term, termState);
return termsEnum.docsAndPositions(liveDocs, reuse);
}
/** Returns the number of terms for this field, or -1 if this /** Returns the number of terms for this field, or -1 if this
* measure isn't stored by the codec. Note that, just like * measure isn't stored by the codec. Note that, just like
* other term measures, this measure does not take deleted * other term measures, this measure does not take deleted
@ -172,7 +91,7 @@ public abstract class Terms {
* into account. */ * into account. */
public abstract long getSumTotalTermFreq() throws IOException; public abstract long getSumTotalTermFreq() throws IOException;
/** Returns the sum of {@link #docFreq(BytesRef)} for /** Returns the sum of {@link TermsEnum#docFreq()} for
* all terms in this field, or -1 if this measure isn't * all terms in this field, or -1 if this measure isn't
* stored by the codec. Note that, just like other term * stored by the codec. Note that, just like other term
* measures, this measure does not take deleted documents * measures, this measure does not take deleted documents
@ -185,34 +104,6 @@ public abstract class Terms {
* measures, this measure does not take deleted documents * measures, this measure does not take deleted documents
* into account. */ * into account. */
public abstract int getDocCount() throws IOException; public abstract int getDocCount() throws IOException;
/**
* Returns a thread-private {@link TermsEnum} instance. Obtaining
* {@link TermsEnum} from this method might be more efficient than using
* {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
* new {@link TermsEnum} instance.
* <p>
* NOTE: {@link TermsEnum} instances obtained from this method must not be
* shared across threads. The enum should only be used within a local context
* where other threads can't access it.
*
* @return a thread-private {@link TermsEnum} instance
* @throws IOException
* if an IOException occurs
* @lucene.internal
*/
public TermsEnum getThreadTermsEnum() throws IOException {
TermsEnum termsEnum = threadEnums.get();
if (termsEnum == null) {
termsEnum = iterator(null);
threadEnums.set(termsEnum);
}
return termsEnum;
}
// subclass must close when done:
protected void close() {
threadEnums.close();
}
public final static Terms[] EMPTY_ARRAY = new Terms[0]; public final static Terms[] EMPTY_ARRAY = new Terms[0];
} }

View File

@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
* limitations under the License. * limitations under the License.
*/ */
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Comparator; import java.util.Comparator;
@ -181,14 +180,8 @@ public class BlockTermsReader extends FieldsProducer {
} }
} }
} finally { } finally {
try { if (postingsReader != null) {
if (postingsReader != null) { postingsReader.close();
postingsReader.close();
}
} finally {
for(FieldReader field : fields.values()) {
field.close();
}
} }
} }
} }
@ -238,7 +231,7 @@ public class BlockTermsReader extends FieldsProducer {
} }
} }
private class FieldReader extends Terms implements Closeable { private class FieldReader extends Terms {
final long numTerms; final long numTerms;
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
final long termsStartPointer; final long termsStartPointer;
@ -261,11 +254,6 @@ public class BlockTermsReader extends FieldsProducer {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public void close() {
super.close();
}
@Override @Override
public TermsEnum iterator(TermsEnum reuse) throws IOException { public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum(); return new SegmentTermsEnum();

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs;
*/ */
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Collection; import java.util.Collection;
@ -194,9 +193,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
try { try {
IOUtils.close(in, postingsReader); IOUtils.close(in, postingsReader);
} finally { } finally {
for(FieldReader field : fields.values()) {
field.close();
}
// Clear so refs to terms index is GCable even if // Clear so refs to terms index is GCable even if
// app hangs onto us: // app hangs onto us:
fields.clear(); fields.clear();
@ -392,7 +388,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton(); final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput(); final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
public final class FieldReader extends Terms implements Closeable { public final class FieldReader extends Terms {
final long numTerms; final long numTerms;
final FieldInfo fieldInfo; final FieldInfo fieldInfo;
final long sumTotalTermFreq; final long sumTotalTermFreq;
@ -450,11 +446,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
return BytesRef.getUTF8SortedAsUnicodeComparator(); return BytesRef.getUTF8SortedAsUnicodeComparator();
} }
@Override
public void close() {
super.close();
}
@Override @Override
public TermsEnum iterator(TermsEnum reuse) throws IOException { public TermsEnum iterator(TermsEnum reuse) throws IOException {
return new SegmentTermsEnum(); return new SegmentTermsEnum();
@ -744,7 +735,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
} }
} }
private final BytesRef savedStartTerm; private BytesRef savedStartTerm;
// TODO: in some cases we can filter by length? eg // TODO: in some cases we can filter by length? eg
// regexp foo*bar must be at least length 6 bytes // regexp foo*bar must be at least length 6 bytes
@ -784,7 +775,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
f.load(rootCode); f.load(rootCode);
// for assert: // for assert:
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm); assert setSavedStartTerm(startTerm);
currentFrame = f; currentFrame = f;
if (startTerm != null) { if (startTerm != null) {
@ -792,6 +783,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
} }
} }
// only for assert:
private boolean setSavedStartTerm(BytesRef startTerm) {
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
return true;
}
@Override @Override
public TermState termState() throws IOException { public TermState termState() throws IOException {
currentFrame.decodeMetaData(); currentFrame.decodeMetaData();
@ -1163,7 +1160,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
// Iterates through terms in this field // Iterates through terms in this field
private final class SegmentTermsEnum extends TermsEnum { private final class SegmentTermsEnum extends TermsEnum {
private final IndexInput in; private IndexInput in;
private Frame[] stack; private Frame[] stack;
private final Frame staticFrame; private final Frame staticFrame;
@ -1182,29 +1179,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
final BytesRef term = new BytesRef(); final BytesRef term = new BytesRef();
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[5]; @SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
public SegmentTermsEnum() throws IOException { public SegmentTermsEnum() throws IOException {
//if (DEBUG) System.out.println("BTTR.init seg=" + segment); //if (DEBUG) System.out.println("BTTR.init seg=" + segment);
in = (IndexInput) BlockTreeTermsReader.this.in.clone(); stack = new Frame[0];
stack = new Frame[5];
for(int stackOrd=0;stackOrd<stack.length;stackOrd++) {
stack[stackOrd] = new Frame(stackOrd);
}
// Used to hold seek by TermState, or cached seek // Used to hold seek by TermState, or cached seek
staticFrame = new Frame(-1); staticFrame = new Frame(-1);
// Init w/ root block; don't use index since it may
// not (and need not) have been loaded
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) { for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
arcs[arcIdx] = new FST.Arc<BytesRef>(); arcs[arcIdx] = new FST.Arc<BytesRef>();
} }
// Init w/ root block; don't use index since it may
// not (and need not) have been loaded
//final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
//assert arc.isFinal();
currentFrame = staticFrame; currentFrame = staticFrame;
final FST.Arc<BytesRef> arc; final FST.Arc<BytesRef> arc;
if (index != null) { if (index != null) {
@ -1214,8 +1203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
} else { } else {
arc = null; arc = null;
} }
currentFrame = pushFrame(arc, rootCode, 0); currentFrame = staticFrame;
currentFrame.loadBlock(); //currentFrame = pushFrame(arc, rootCode, 0);
//currentFrame.loadBlock();
validIndexPrefix = 0; validIndexPrefix = 0;
// if (DEBUG) { // if (DEBUG) {
// System.out.println("init frame state " + currentFrame.ord); // System.out.println("init frame state " + currentFrame.ord);
@ -1226,6 +1216,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
// computeBlockStats().print(System.out); // computeBlockStats().print(System.out);
} }
private void initIndexInput() {
if (this.in == null) {
this.in = (IndexInput) BlockTreeTermsReader.this.in.clone();
}
}
/** Runs next() through the entire terms dict, /** Runs next() through the entire terms dict,
* computing aggregate statistics. */ * computing aggregate statistics. */
public Stats computeBlockStats() throws IOException { public Stats computeBlockStats() throws IOException {
@ -1975,6 +1971,20 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override @Override
public BytesRef next() throws IOException { public BytesRef next() throws IOException {
if (in == null) {
// Fresh TermsEnum; seek to first term:
final FST.Arc<BytesRef> arc;
if (index != null) {
arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
} else {
arc = null;
}
currentFrame = pushFrame(arc, rootCode, 0);
currentFrame.loadBlock();
}
targetBeforeCurrentLength = currentFrame.ord; targetBeforeCurrentLength = currentFrame.ord;
assert !eof; assert !eof;
@ -2242,6 +2252,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
use. */ use. */
void loadBlock() throws IOException { void loadBlock() throws IOException {
// Clone the IndexInput lazily, so that consumers
// that just pull a TermsEnum to
// seekExact(TermState) don't pay this cost:
initIndexInput();
if (nextEnt != -1) { if (nextEnt != -1) {
// Already loaded // Already loaded
return; return;

View File

@ -20,20 +20,23 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.Terms;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TermContext; import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.Bits;
/** /**
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added * MultiPhraseQuery is a generalized version of PhraseQuery, with an added
@ -134,6 +137,7 @@ public class MultiPhraseQuery extends Query {
private class MultiPhraseWeight extends Weight { private class MultiPhraseWeight extends Weight {
private final Similarity similarity; private final Similarity similarity;
private final Similarity.Stats stats; private final Similarity.Stats stats;
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
public MultiPhraseWeight(IndexSearcher searcher) public MultiPhraseWeight(IndexSearcher searcher)
throws IOException { throws IOException {
@ -144,7 +148,11 @@ public class MultiPhraseQuery extends Query {
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>(); ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
for(final Term[] terms: termArrays) { for(final Term[] terms: termArrays) {
for (Term term: terms) { for (Term term: terms) {
TermContext termContext = TermContext.build(context, term, true); TermContext termContext = termContexts.get(term);
if (termContext == null) {
termContext = TermContext.build(context, term, true);
termContexts.put(term, termContext);
}
allTermStats.add(searcher.termStatistics(term, termContext)); allTermStats.add(searcher.termStatistics(term, termContext));
} }
} }
@ -174,6 +182,14 @@ public class MultiPhraseQuery extends Query {
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum termsEnum = fieldTerms.iterator(null);
for (int pos=0; pos<postingsFreqs.length; pos++) { for (int pos=0; pos<postingsFreqs.length; pos++) {
Term[] terms = termArrays.get(pos); Term[] terms = termArrays.get(pos);
@ -181,31 +197,43 @@ public class MultiPhraseQuery extends Query {
int docFreq; int docFreq;
if (terms.length > 1) { if (terms.length > 1) {
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms); postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
// coarse -- this overcounts since a given doc can // coarse -- this overcounts since a given doc can
// have more than one terms: // have more than one term:
docFreq = 0; docFreq = 0;
for(int termIdx=0;termIdx<terms.length;termIdx++) { for(int termIdx=0;termIdx<terms.length;termIdx++) {
docFreq += reader.docFreq(terms[termIdx]); final Term term = terms[termIdx];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term not in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
docFreq += termsEnum.docFreq();
}
if (docFreq == 0) {
// None of the terms are in this reader
return null;
} }
} else { } else {
final Term term = terms[0]; final Term term = terms[0];
postingsEnum = reader.termPositionsEnum(liveDocs, TermState termState = termContexts.get(term).get(context.ord);
term.field(), if (termState == null) {
term.bytes()); // Term not in reader
return null;
}
termsEnum.seekExact(term.bytes(), termState);
postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
if (postingsEnum == null) { if (postingsEnum == null) {
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) { // term does exist, but has no positions
// term does exist, but has no positions assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
} else {
// term does not exist
return null;
}
} }
docFreq = reader.docFreq(term.field(), term.bytes()); docFreq = termsEnum.docFreq();
} }
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]); postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
@ -437,20 +465,22 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
private DocsQueue _queue; private DocsQueue _queue;
private IntQueue _posList; private IntQueue _posList;
public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException { public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>(); List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
for (int i = 0; i < terms.length; i++) { for (int i = 0; i < terms.length; i++) {
DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs, final Term term = terms[i];
terms[i].field(), TermState termState = termContexts.get(term).get(context.ord);
terms[i].bytes()); if (termState == null) {
if (postings != null) { // Term doesn't exist in reader
docsEnums.add(postings); continue;
} else {
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
}
} }
termsEnum.seekExact(term.bytes(), termState);
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
docsEnums.add(postings);
} }
_queue = new DocsQueue(docsEnums); _queue = new DocsQueue(docsEnums);

View File

@ -18,24 +18,24 @@ package org.apache.lucene.search;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Set;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Set;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer; import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TermContext; import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
/** A Query that matches documents containing a particular sequence of terms. /** A Query that matches documents containing a particular sequence of terms.
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>. * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
@ -222,27 +222,32 @@ public class PhraseQuery extends Query {
final IndexReader reader = context.reader; final IndexReader reader = context.reader;
final Bits liveDocs = acceptDocs; final Bits liveDocs = acceptDocs;
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()]; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
final Terms fieldTerms = reader.terms(field);
if (fieldTerms == null) {
return null;
}
// Reuse single TermsEnum below:
final TermsEnum te = fieldTerms.iterator(null);
for (int i = 0; i < terms.size(); i++) { for (int i = 0; i < terms.size(); i++) {
final Term t = terms.get(i); final Term t = terms.get(i);
final TermState state = states[i].get(context.ord); final TermState state = states[i].get(context.ord);
if (state == null) { /* term doesnt exist in this segment */ if (state == null) { /* term doesnt exist in this segment */
assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader"; assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader";
return null; return null;
} }
DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs, te.seekExact(t.bytes(), state);
t.field(), DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
t.bytes(),
state);
// PhraseQuery on a field that did not index // PhraseQuery on a field that did not index
// positions. // positions.
if (postingsEnum == null) { if (postingsEnum == null) {
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader"; assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
// term does exist, but has no positions // term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
} }
// get the docFreq without seeking
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
te.seekExact(t.bytes(), state);
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t); postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
} }
@ -264,10 +269,9 @@ public class PhraseQuery extends Query {
} }
} }
// only called from assert
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert return reader.docFreq(field, bytes) == 0;
final Terms terms = reader.terms(field);
return terms == null || terms.docFreq(bytes) == 0;
} }
@Override @Override

View File

@ -23,7 +23,6 @@ import java.util.Set;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext;
@ -41,13 +40,13 @@ import org.apache.lucene.util.ToStringUtils;
*/ */
public class TermQuery extends Query { public class TermQuery extends Query {
private final Term term; private final Term term;
private int docFreq; private final int docFreq;
private transient TermContext perReaderTermState; private final TermContext perReaderTermState;
final class TermWeight extends Weight { final class TermWeight extends Weight {
private final Similarity similarity; private final Similarity similarity;
private final Similarity.Stats stats; private final Similarity.Stats stats;
private transient TermContext termStates; private final TermContext termStates;
public TermWeight(IndexSearcher searcher, TermContext termStates) public TermWeight(IndexSearcher searcher, TermContext termStates)
throws IOException { throws IOException {
@ -108,7 +107,7 @@ public class TermQuery extends Query {
return null; return null;
} }
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state); termsEnum.seekExact(term.bytes(), state);
return termsEnum; return termsEnum;
} }
@ -116,8 +115,7 @@ public class TermQuery extends Query {
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException { private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
// only called from assert // only called from assert
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
final Terms terms = reader.terms(field); return reader.docFreq(field, bytes) == 0;
return terms == null || terms.docFreq(bytes) == 0;
} }
@Override @Override

View File

@ -17,7 +17,6 @@ package org.apache.lucene.search.spans;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Fields; import org.apache.lucene.index.Fields;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@ -26,7 +25,6 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext; import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
@ -99,7 +97,7 @@ public class SpanTermQuery extends SpanQuery {
if (fields != null) { if (fields != null) {
final Terms terms = fields.terms(term.field()); final Terms terms = fields.terms(term.field());
if (terms != null) { if (terms != null) {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term.bytes(), true)) { if (termsEnum.seekExact(term.bytes(), true)) {
state = termsEnum.termState(); state = termsEnum.termState();
} else { } else {
@ -119,7 +117,7 @@ public class SpanTermQuery extends SpanQuery {
return TermSpans.EMPTY_TERM_SPANS; return TermSpans.EMPTY_TERM_SPANS;
} }
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum(); final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
termsEnum.seekExact(term.bytes(), state); termsEnum.seekExact(term.bytes(), state);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null); final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);

View File

@ -46,7 +46,6 @@ public final class TermContext {
//public static boolean DEBUG = BlockTreeTermsWriter.DEBUG; //public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
/** /**
* Creates an empty {@link TermContext} from a {@link ReaderContext} * Creates an empty {@link TermContext} from a {@link ReaderContext}
*/ */
@ -94,7 +93,7 @@ public final class TermContext {
if (fields != null) { if (fields != null) {
final Terms terms = fields.terms(field); final Terms terms = fields.terms(field);
if (terms != null) { if (terms != null) {
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share! final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(bytes, cache)) { if (termsEnum.seekExact(bytes, cache)) {
final TermState termState = termsEnum.termState(); final TermState termState = termsEnum.termState();
//if (DEBUG) System.out.println(" found"); //if (DEBUG) System.out.println(" found");

View File

@ -349,7 +349,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again // now reuse and check again
disi = r.terms("foo").docs(null, new BytesRef("bar"), disi); TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docs(null, disi);
docid = disi.docID(); docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@ -372,7 +374,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
// now reuse and check again // now reuse and check again
disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi); TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docsAndPositions(null, disi);
docid = disi.docID(); docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

View File

@ -199,7 +199,7 @@ public class TestDocumentWriter extends LuceneTestCase {
writer.close(); writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null); DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
int freq = termPositions.freq(); int freq = termPositions.freq();
assertEquals(3, freq); assertEquals(3, freq);
@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase {
writer.close(); writer.close();
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random)); SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null); DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq()); assertEquals(1, termPositions.freq());
assertEquals(0, termPositions.nextPosition()); assertEquals(0, termPositions.nextPosition());
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null); termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(2, termPositions.freq()); assertEquals(2, termPositions.freq());
assertEquals(1, termPositions.nextPosition()); assertEquals(1, termPositions.nextPosition());
assertEquals(3, termPositions.nextPosition()); assertEquals(3, termPositions.nextPosition());
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null); termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3"));
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS); assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
assertEquals(1, termPositions.freq()); assertEquals(1, termPositions.freq());
assertEquals(2, termPositions.nextPosition()); assertEquals(2, termPositions.nextPosition());

View File

@ -1340,13 +1340,12 @@ public class TestIndexReader extends LuceneTestCase
writer.addDocument(d); writer.addDocument(d);
IndexReader r = writer.getReader(); IndexReader r = writer.getReader();
writer.close(); writer.close();
Terms terms = MultiFields.getTerms(r, "f");
try { try {
// Make sure codec impls totalTermFreq (eg PreFlex doesn't) // Make sure codec impls totalTermFreq (eg PreFlex doesn't)
Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1); Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1);
assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
assertEquals(2, terms.totalTermFreq(new BytesRef("a"))); assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a")));
assertEquals(1, terms.totalTermFreq(new BytesRef("b"))); assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
} finally { } finally {
r.close(); r.close();
dir.close(); dir.close();

View File

@ -113,7 +113,6 @@ public class TestMultiFields extends LuceneTestCase {
for(int delDoc : deleted) { for(int delDoc : deleted) {
assertFalse(liveDocs.get(delDoc)); assertFalse(liveDocs.get(delDoc));
} }
Terms terms2 = MultiFields.getTerms(reader, "field");
for(int i=0;i<100;i++) { for(int i=0;i<100;i++) {
BytesRef term = terms.get(random.nextInt(terms.size())); BytesRef term = terms.get(random.nextInt(terms.size()));
@ -121,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
} }
DocsEnum docsEnum = terms2.docs(liveDocs, term, null); DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
assertNotNull(docsEnum); assertNotNull(docsEnum);
for(int docID : docs.get(term)) { for(int docID : docs.get(term)) {

View File

@ -447,7 +447,7 @@ public class TestOmitTf extends LuceneTestCase {
IndexReader ir = iw.getReader(); IndexReader ir = iw.getReader();
iw.close(); iw.close();
Terms terms = MultiFields.getTerms(ir, "foo"); Terms terms = MultiFields.getTerms(ir, "foo");
assertEquals(-1, terms.totalTermFreq(new BytesRef("bar"))); assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar")));
assertEquals(-1, terms.getSumTotalTermFreq()); assertEquals(-1, terms.getSumTotalTermFreq());
ir.close(); ir.close();
dir.close(); dir.close();

View File

@ -342,6 +342,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
return; return;
} }
Terms terms2 = fields.terms(idField); Terms terms2 = fields.terms(idField);
TermsEnum termsEnum2 = terms2.iterator(null);
DocsEnum termDocs1 = null; DocsEnum termDocs1 = null;
DocsEnum termDocs2 = null; DocsEnum termDocs2 = null;
@ -354,7 +355,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
} }
termDocs1 = termsEnum.docs(liveDocs1, termDocs1); termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
termDocs2 = terms2.docs(liveDocs2, term, termDocs2); if (termsEnum2.seekExact(term, false)) {
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
} else {
termDocs2 = null;
}
if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) { if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
// This doc is deleted and wasn't replaced // This doc is deleted and wasn't replaced
@ -397,11 +402,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" " + field + ":"); System.out.println(" " + field + ":");
Terms terms3 = fieldsEnum.terms(); Terms terms3 = fieldsEnum.terms();
assertNotNull(terms3); assertNotNull(terms3);
TermsEnum termsEnum2 = terms3.iterator(null); TermsEnum termsEnum3 = terms3.iterator(null);
BytesRef term2; BytesRef term2;
while((term2 = termsEnum2.next()) != null) { while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum2.docsAndPositions(null, dpEnum); dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
if (dpEnum != null) { if (dpEnum != null) {
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dpEnum.freq(); final int freq = dpEnum.freq();
@ -410,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition()); System.out.println(" pos=" + dpEnum.nextPosition());
} }
} else { } else {
dEnum = termsEnum2.docs(null, dEnum); dEnum = termsEnum3.docs(null, dEnum);
assertNotNull(dEnum); assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq(); final int freq = dEnum.freq();
@ -431,11 +436,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" " + field + ":"); System.out.println(" " + field + ":");
Terms terms3 = fieldsEnum.terms(); Terms terms3 = fieldsEnum.terms();
assertNotNull(terms3); assertNotNull(terms3);
TermsEnum termsEnum2 = terms3.iterator(null); TermsEnum termsEnum3 = terms3.iterator(null);
BytesRef term2; BytesRef term2;
while((term2 = termsEnum2.next()) != null) { while((term2 = termsEnum3.next()) != null) {
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq()); System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
dpEnum = termsEnum2.docsAndPositions(null, dpEnum); dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
if (dpEnum != null) { if (dpEnum != null) {
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dpEnum.freq(); final int freq = dpEnum.freq();
@ -444,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition()); System.out.println(" pos=" + dpEnum.nextPosition());
} }
} else { } else {
dEnum = termsEnum2.docs(null, dEnum); dEnum = termsEnum3.docs(null, dEnum);
assertNotNull(dEnum); assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq(); final int freq = dEnum.freq();
@ -467,7 +472,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
String field1=null, field2=null; String field1=null, field2=null;
TermsEnum termsEnum1 = null; TermsEnum termsEnum1 = null;
TermsEnum termsEnum2 = null; termsEnum2 = null;
DocsEnum docs1=null, docs2=null; DocsEnum docs1=null, docs2=null;
// pack both doc and freq into single element for easy sorting // pack both doc and freq into single element for easy sorting

View File

@ -59,7 +59,17 @@ public class TFValueSource extends TermFreqValueSource {
public void reset() throws IOException { public void reset() throws IOException {
// no one should call us for deleted docs? // no one should call us for deleted docs?
docs = terms==null ? null : terms.docs(null, indexedBytes, null); if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) { if (docs == null) {
docs = new DocsEnum() { docs = new DocsEnum() {
@Override @Override

View File

@ -51,7 +51,18 @@ public class TermFreqValueSource extends DocFreqValueSource {
public void reset() throws IOException { public void reset() throws IOException {
// no one should call us for deleted docs? // no one should call us for deleted docs?
docs = terms == null ? null : terms.docs(null, indexedBytes, null);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
} else {
docs = null;
}
} else {
docs = null;
}
if (docs == null) { if (docs == null) {
docs = new DocsEnum() { docs = new DocsEnum() {
@Override @Override

View File

@ -555,7 +555,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
Terms terms = fields.terms(t.field()); Terms terms = fields.terms(t.field());
if (terms == null) return -1; if (terms == null) return -1;
BytesRef termBytes = t.bytes(); BytesRef termBytes = t.bytes();
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null); final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
if (docs == null) return -1; if (docs == null) return -1;
int id = docs.nextDoc(); int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
@ -947,7 +951,13 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
BytesRef termBytes = t.bytes(); BytesRef termBytes = t.bytes();
Bits liveDocs = reader.getLiveDocs(); Bits liveDocs = reader.getLiveDocs();
DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null); DocsEnum docsEnum = null;
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(termBytes, false)) {
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
}
}
if (docsEnum != null) { if (docsEnum != null) {
DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult(); DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult();

View File

@ -723,8 +723,11 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
Terms terms = fields.terms(t.field()); Terms terms = fields.terms(t.field());
if (terms == null) return -1; if (terms == null) return -1;
BytesRef termBytes = t.bytes(); BytesRef termBytes = t.bytes();
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null); final TermsEnum termsEnum = terms.iterator(null);
if (docs == null) return -1; if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
int id = docs.nextDoc(); int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) { if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc(); int next = docs.nextDoc();