mirror of https://github.com/apache/lucene.git
LUCENE-3562: stop caching thread-private TermsEnums in Terms
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5a3b635239
commit
9e27723b37
|
@ -21,9 +21,7 @@ import java.io.File;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
|
||||
/*
|
||||
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
|
||||
|
@ -50,10 +48,9 @@ public class GetTermInfo {
|
|||
|
||||
public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception {
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
Terms terms =MultiFields.getTerms(reader, field);
|
||||
long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext);
|
||||
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
|
||||
field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext));
|
||||
field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext));
|
||||
}
|
||||
|
||||
private static void usage() {
|
||||
|
|
|
@ -84,21 +84,6 @@ public class FilterIndexReader extends IndexReader {
|
|||
return in.getComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq(BytesRef text) throws IOException {
|
||||
return in.docFreq(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
|
||||
return in.docs(liveDocs, text, reuse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
|
||||
return in.docsAndPositions(liveDocs, text, reuse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getUniqueTermCount() throws IOException {
|
||||
return in.getUniqueTermCount();
|
||||
|
|
|
@ -991,7 +991,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
if (terms == null) {
|
||||
return 0;
|
||||
}
|
||||
return terms.docFreq(term);
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docFreq();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the number of documents containing the term
|
||||
|
@ -1008,7 +1013,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
if (terms == null) {
|
||||
return 0;
|
||||
}
|
||||
return terms.totalTermFreq(term);
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.totalTermFreq();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** This may return null if the field does not exist.*/
|
||||
|
@ -1027,16 +1037,17 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
assert field != null;
|
||||
assert term != null;
|
||||
final Fields fields = fields();
|
||||
if (fields == null) {
|
||||
return null;
|
||||
}
|
||||
if (fields != null) {
|
||||
final Terms terms = fields.terms(field);
|
||||
if (terms != null) {
|
||||
return terms.docs(liveDocs, term, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* field & term. This may return null, if either the
|
||||
|
@ -1046,16 +1057,17 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
assert field != null;
|
||||
assert term != null;
|
||||
final Fields fields = fields();
|
||||
if (fields == null) {
|
||||
return null;
|
||||
}
|
||||
if (fields != null) {
|
||||
final Terms terms = fields.terms(field);
|
||||
if (terms != null) {
|
||||
return terms.docsAndPositions(liveDocs, term, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docsAndPositions(liveDocs, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@link DocsEnum} for the specified field and
|
||||
|
@ -1066,16 +1078,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
assert state != null;
|
||||
assert field != null;
|
||||
final Fields fields = fields();
|
||||
if (fields == null) {
|
||||
return null;
|
||||
}
|
||||
if (fields != null) {
|
||||
final Terms terms = fields.terms(field);
|
||||
if (terms != null) {
|
||||
return terms.docs(liveDocs, term, state, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
termsEnum.seekExact(term, state);
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns {@link DocsAndPositionsEnum} for the specified field and
|
||||
|
@ -1086,16 +1098,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
assert state != null;
|
||||
assert field != null;
|
||||
final Fields fields = fields();
|
||||
if (fields == null) {
|
||||
return null;
|
||||
}
|
||||
if (fields != null) {
|
||||
final Terms terms = fields.terms(field);
|
||||
if (terms != null) {
|
||||
return terms.docsAndPositions(liveDocs, term, state, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
termsEnum.seekExact(term, state);
|
||||
return termsEnum.docsAndPositions(liveDocs, null);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
/** Deletes the document numbered <code>docNum</code>. Once a document is
|
||||
|
|
|
@ -156,11 +156,13 @@ public final class MultiFields extends Fields {
|
|||
assert term != null;
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
return terms.docs(liveDocs, term, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* field & term. This may return null if the term does
|
||||
|
@ -170,11 +172,13 @@ public final class MultiFields extends Fields {
|
|||
assert term != null;
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
return terms.docsAndPositions(liveDocs, term, null);
|
||||
} else {
|
||||
return null;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docsAndPositions(liveDocs, null);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
|
||||
this.subs = subs;
|
||||
|
@ -233,6 +237,17 @@ public final class MultiFields extends Fields {
|
|||
return result;
|
||||
}
|
||||
|
||||
public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(text, true)) {
|
||||
return termsEnum.totalTermFreq();
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getUniqueFieldCount() {
|
||||
return terms.size();
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.index.codecs.TermVectorsReader;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
|
@ -473,17 +472,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
return core.fields;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq(String field, BytesRef term) throws IOException {
|
||||
ensureOpen();
|
||||
Terms terms = core.fields.terms(field);
|
||||
if (terms != null) {
|
||||
return terms.docFreq(term);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numDocs() {
|
||||
// Don't call ensureOpen() here (it could affect performance)
|
||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
/**
|
||||
|
@ -32,10 +30,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
|
||||
public abstract class Terms {
|
||||
|
||||
// Privately cache a TermsEnum per-thread for looking up
|
||||
// docFreq and getting a private DocsEnum
|
||||
private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();
|
||||
|
||||
/** Returns an iterator that will step through all
|
||||
* terms. This method will not return null. If you have
|
||||
* a previous TermsEnum, for example from a different
|
||||
|
@ -83,81 +77,6 @@ public abstract class Terms {
|
|||
* reuse it. */
|
||||
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
||||
|
||||
/** Returns the number of documents containing the
|
||||
* specified term text. Returns 0 if the term does not
|
||||
* exist. */
|
||||
public int docFreq(BytesRef text) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
if (termsEnum.seekExact(text, true)) {
|
||||
return termsEnum.docFreq();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the total number of occurrences of this term
|
||||
* across all documents (the sum of the freq() for each
|
||||
* doc that has this term). This will be -1 if the
|
||||
* codec doesn't support this measure. Note that, like
|
||||
* other term measures, this measure does not take
|
||||
* deleted documents into account. */
|
||||
public long totalTermFreq(BytesRef text) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
if (termsEnum.seekExact(text, true)) {
|
||||
return termsEnum.totalTermFreq();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Get {@link DocsEnum} for the specified term. This
|
||||
* method may return null if the term does not exist. */
|
||||
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
if (termsEnum.seekExact(text, true)) {
|
||||
return termsEnum.docs(liveDocs, reuse);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Get {@link DocsEnum} for the specified term. This
|
||||
* method will may return null if the term does not
|
||||
* exists, or positions were not indexed. */
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
if (termsEnum.seekExact(text, true)) {
|
||||
return termsEnum.docsAndPositions(liveDocs, reuse);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Get {@link DocsEnum} for the specified {@link TermState}.
|
||||
* This method may return <code>null</code> if the term does not exist.
|
||||
*
|
||||
* @see TermsEnum#termState()
|
||||
* @see TermsEnum#seekExact(BytesRef, TermState) */
|
||||
public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
termsEnum.seekExact(term, termState);
|
||||
return termsEnum.docs(liveDocs, reuse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get {@link DocsEnum} for the specified {@link TermState}. This
|
||||
* method will may return <code>null</code> if the term does not exists, or positions were
|
||||
* not indexed.
|
||||
*
|
||||
* @see TermsEnum#termState()
|
||||
* @see TermsEnum#seekExact(BytesRef, TermState) */
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
|
||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
||||
termsEnum.seekExact(term, termState);
|
||||
return termsEnum.docsAndPositions(liveDocs, reuse);
|
||||
}
|
||||
|
||||
/** Returns the number of terms for this field, or -1 if this
|
||||
* measure isn't stored by the codec. Note that, just like
|
||||
* other term measures, this measure does not take deleted
|
||||
|
@ -172,7 +91,7 @@ public abstract class Terms {
|
|||
* into account. */
|
||||
public abstract long getSumTotalTermFreq() throws IOException;
|
||||
|
||||
/** Returns the sum of {@link #docFreq(BytesRef)} for
|
||||
/** Returns the sum of {@link TermsEnum#docFreq()} for
|
||||
* all terms in this field, or -1 if this measure isn't
|
||||
* stored by the codec. Note that, just like other term
|
||||
* measures, this measure does not take deleted documents
|
||||
|
@ -186,33 +105,5 @@ public abstract class Terms {
|
|||
* into account. */
|
||||
public abstract int getDocCount() throws IOException;
|
||||
|
||||
/**
|
||||
* Returns a thread-private {@link TermsEnum} instance. Obtaining
|
||||
* {@link TermsEnum} from this method might be more efficient than using
|
||||
* {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
|
||||
* new {@link TermsEnum} instance.
|
||||
* <p>
|
||||
* NOTE: {@link TermsEnum} instances obtained from this method must not be
|
||||
* shared across threads. The enum should only be used within a local context
|
||||
* where other threads can't access it.
|
||||
*
|
||||
* @return a thread-private {@link TermsEnum} instance
|
||||
* @throws IOException
|
||||
* if an IOException occurs
|
||||
* @lucene.internal
|
||||
*/
|
||||
public TermsEnum getThreadTermsEnum() throws IOException {
|
||||
TermsEnum termsEnum = threadEnums.get();
|
||||
if (termsEnum == null) {
|
||||
termsEnum = iterator(null);
|
||||
threadEnums.set(termsEnum);
|
||||
}
|
||||
return termsEnum;
|
||||
}
|
||||
|
||||
// subclass must close when done:
|
||||
protected void close() {
|
||||
threadEnums.close();
|
||||
}
|
||||
public final static Terms[] EMPTY_ARRAY = new Terms[0];
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Comparator;
|
||||
|
@ -181,15 +180,9 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
} finally {
|
||||
try {
|
||||
if (postingsReader != null) {
|
||||
postingsReader.close();
|
||||
}
|
||||
} finally {
|
||||
for(FieldReader field : fields.values()) {
|
||||
field.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -238,7 +231,7 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
private class FieldReader extends Terms implements Closeable {
|
||||
private class FieldReader extends Terms {
|
||||
final long numTerms;
|
||||
final FieldInfo fieldInfo;
|
||||
final long termsStartPointer;
|
||||
|
@ -261,11 +254,6 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
return new SegmentTermsEnum();
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs;
|
|||
*/
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Collection;
|
||||
|
@ -194,9 +193,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
try {
|
||||
IOUtils.close(in, postingsReader);
|
||||
} finally {
|
||||
for(FieldReader field : fields.values()) {
|
||||
field.close();
|
||||
}
|
||||
// Clear so refs to terms index is GCable even if
|
||||
// app hangs onto us:
|
||||
fields.clear();
|
||||
|
@ -392,7 +388,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
|
||||
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
|
||||
|
||||
public final class FieldReader extends Terms implements Closeable {
|
||||
public final class FieldReader extends Terms {
|
||||
final long numTerms;
|
||||
final FieldInfo fieldInfo;
|
||||
final long sumTotalTermFreq;
|
||||
|
@ -450,11 +446,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
return new SegmentTermsEnum();
|
||||
|
@ -744,7 +735,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
private final BytesRef savedStartTerm;
|
||||
private BytesRef savedStartTerm;
|
||||
|
||||
// TODO: in some cases we can filter by length? eg
|
||||
// regexp foo*bar must be at least length 6 bytes
|
||||
|
@ -784,7 +775,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
f.load(rootCode);
|
||||
|
||||
// for assert:
|
||||
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
|
||||
assert setSavedStartTerm(startTerm);
|
||||
|
||||
currentFrame = f;
|
||||
if (startTerm != null) {
|
||||
|
@ -792,6 +783,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
|
||||
// only for assert:
|
||||
private boolean setSavedStartTerm(BytesRef startTerm) {
|
||||
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TermState termState() throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
|
@ -1163,7 +1160,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
// Iterates through terms in this field
|
||||
private final class SegmentTermsEnum extends TermsEnum {
|
||||
private final IndexInput in;
|
||||
private IndexInput in;
|
||||
|
||||
private Frame[] stack;
|
||||
private final Frame staticFrame;
|
||||
|
@ -1182,29 +1179,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
final BytesRef term = new BytesRef();
|
||||
|
||||
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
|
||||
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
|
||||
|
||||
public SegmentTermsEnum() throws IOException {
|
||||
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
|
||||
in = (IndexInput) BlockTreeTermsReader.this.in.clone();
|
||||
stack = new Frame[5];
|
||||
for(int stackOrd=0;stackOrd<stack.length;stackOrd++) {
|
||||
stack[stackOrd] = new Frame(stackOrd);
|
||||
}
|
||||
stack = new Frame[0];
|
||||
|
||||
// Used to hold seek by TermState, or cached seek
|
||||
staticFrame = new Frame(-1);
|
||||
|
||||
// Init w/ root block; don't use index since it may
|
||||
// not (and need not) have been loaded
|
||||
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
|
||||
arcs[arcIdx] = new FST.Arc<BytesRef>();
|
||||
}
|
||||
|
||||
// Init w/ root block; don't use index since it may
|
||||
// not (and need not) have been loaded
|
||||
//final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
|
||||
|
||||
// Empty string prefix must have an output in the index!
|
||||
//assert arc.isFinal();
|
||||
|
||||
currentFrame = staticFrame;
|
||||
final FST.Arc<BytesRef> arc;
|
||||
if (index != null) {
|
||||
|
@ -1214,8 +1203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
} else {
|
||||
arc = null;
|
||||
}
|
||||
currentFrame = pushFrame(arc, rootCode, 0);
|
||||
currentFrame.loadBlock();
|
||||
currentFrame = staticFrame;
|
||||
//currentFrame = pushFrame(arc, rootCode, 0);
|
||||
//currentFrame.loadBlock();
|
||||
validIndexPrefix = 0;
|
||||
// if (DEBUG) {
|
||||
// System.out.println("init frame state " + currentFrame.ord);
|
||||
|
@ -1226,6 +1216,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
// computeBlockStats().print(System.out);
|
||||
}
|
||||
|
||||
private void initIndexInput() {
|
||||
if (this.in == null) {
|
||||
this.in = (IndexInput) BlockTreeTermsReader.this.in.clone();
|
||||
}
|
||||
}
|
||||
|
||||
/** Runs next() through the entire terms dict,
|
||||
* computing aggregate statistics. */
|
||||
public Stats computeBlockStats() throws IOException {
|
||||
|
@ -1975,6 +1971,20 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
|
||||
if (in == null) {
|
||||
// Fresh TermsEnum; seek to first term:
|
||||
final FST.Arc<BytesRef> arc;
|
||||
if (index != null) {
|
||||
arc = index.getFirstArc(arcs[0]);
|
||||
// Empty string prefix must have an output in the index!
|
||||
assert arc.isFinal();
|
||||
} else {
|
||||
arc = null;
|
||||
}
|
||||
currentFrame = pushFrame(arc, rootCode, 0);
|
||||
currentFrame.loadBlock();
|
||||
}
|
||||
|
||||
targetBeforeCurrentLength = currentFrame.ord;
|
||||
|
||||
assert !eof;
|
||||
|
@ -2242,6 +2252,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
use. */
|
||||
void loadBlock() throws IOException {
|
||||
|
||||
// Clone the IndexInput lazily, so that consumers
|
||||
// that just pull a TermsEnum to
|
||||
// seekExact(TermState) don't pay this cost:
|
||||
initIndexInput();
|
||||
|
||||
if (nextEnt != -1) {
|
||||
// Already loaded
|
||||
return;
|
||||
|
|
|
@ -20,20 +20,23 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
|
||||
|
@ -134,6 +137,7 @@ public class MultiPhraseQuery extends Query {
|
|||
private class MultiPhraseWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.Stats stats;
|
||||
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||
|
||||
public MultiPhraseWeight(IndexSearcher searcher)
|
||||
throws IOException {
|
||||
|
@ -144,7 +148,11 @@ public class MultiPhraseQuery extends Query {
|
|||
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
|
||||
for(final Term[] terms: termArrays) {
|
||||
for (Term term: terms) {
|
||||
TermContext termContext = TermContext.build(context, term, true);
|
||||
TermContext termContext = termContexts.get(term);
|
||||
if (termContext == null) {
|
||||
termContext = TermContext.build(context, term, true);
|
||||
termContexts.put(term, termContext);
|
||||
}
|
||||
allTermStats.add(searcher.termStatistics(term, termContext));
|
||||
}
|
||||
}
|
||||
|
@ -174,6 +182,14 @@ public class MultiPhraseQuery extends Query {
|
|||
|
||||
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
|
||||
|
||||
final Terms fieldTerms = reader.terms(field);
|
||||
if (fieldTerms == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Reuse single TermsEnum below:
|
||||
final TermsEnum termsEnum = fieldTerms.iterator(null);
|
||||
|
||||
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
||||
Term[] terms = termArrays.get(pos);
|
||||
|
||||
|
@ -181,31 +197,43 @@ public class MultiPhraseQuery extends Query {
|
|||
int docFreq;
|
||||
|
||||
if (terms.length > 1) {
|
||||
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms);
|
||||
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
|
||||
|
||||
// coarse -- this overcounts since a given doc can
|
||||
// have more than one terms:
|
||||
// have more than one term:
|
||||
docFreq = 0;
|
||||
for(int termIdx=0;termIdx<terms.length;termIdx++) {
|
||||
docFreq += reader.docFreq(terms[termIdx]);
|
||||
final Term term = terms[termIdx];
|
||||
TermState termState = termContexts.get(term).get(context.ord);
|
||||
if (termState == null) {
|
||||
// Term not in reader
|
||||
continue;
|
||||
}
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
docFreq += termsEnum.docFreq();
|
||||
}
|
||||
|
||||
if (docFreq == 0) {
|
||||
// None of the terms are in this reader
|
||||
return null;
|
||||
}
|
||||
} else {
|
||||
final Term term = terms[0];
|
||||
postingsEnum = reader.termPositionsEnum(liveDocs,
|
||||
term.field(),
|
||||
term.bytes());
|
||||
|
||||
if (postingsEnum == null) {
|
||||
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
} else {
|
||||
// term does not exist
|
||||
TermState termState = termContexts.get(term).get(context.ord);
|
||||
if (termState == null) {
|
||||
// Term not in reader
|
||||
return null;
|
||||
}
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
|
||||
|
||||
if (postingsEnum == null) {
|
||||
// term does exist, but has no positions
|
||||
assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
}
|
||||
|
||||
docFreq = reader.docFreq(term.field(), term.bytes());
|
||||
docFreq = termsEnum.docFreq();
|
||||
}
|
||||
|
||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
||||
|
@ -437,20 +465,22 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
private DocsQueue _queue;
|
||||
private IntQueue _posList;
|
||||
|
||||
public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException {
|
||||
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
|
||||
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs,
|
||||
terms[i].field(),
|
||||
terms[i].bytes());
|
||||
if (postings != null) {
|
||||
docsEnums.add(postings);
|
||||
} else {
|
||||
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
|
||||
final Term term = terms[i];
|
||||
TermState termState = termContexts.get(term).get(context.ord);
|
||||
if (termState == null) {
|
||||
// Term doesn't exist in reader
|
||||
continue;
|
||||
}
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
|
||||
if (postings == null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
||||
}
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
}
|
||||
docsEnums.add(postings);
|
||||
}
|
||||
|
||||
_queue = new DocsQueue(docsEnums);
|
||||
|
|
|
@ -18,24 +18,24 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/** A Query that matches documents containing a particular sequence of terms.
|
||||
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
|
||||
|
@ -222,27 +222,32 @@ public class PhraseQuery extends Query {
|
|||
final IndexReader reader = context.reader;
|
||||
final Bits liveDocs = acceptDocs;
|
||||
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
|
||||
|
||||
final Terms fieldTerms = reader.terms(field);
|
||||
if (fieldTerms == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Reuse single TermsEnum below:
|
||||
final TermsEnum te = fieldTerms.iterator(null);
|
||||
|
||||
for (int i = 0; i < terms.size(); i++) {
|
||||
final Term t = terms.get(i);
|
||||
final TermState state = states[i].get(context.ord);
|
||||
if (state == null) { /* term doesnt exist in this segment */
|
||||
assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader";
|
||||
assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader";
|
||||
return null;
|
||||
}
|
||||
DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs,
|
||||
t.field(),
|
||||
t.bytes(),
|
||||
state);
|
||||
te.seekExact(t.bytes(), state);
|
||||
DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
|
||||
|
||||
// PhraseQuery on a field that did not index
|
||||
// positions.
|
||||
if (postingsEnum == null) {
|
||||
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
|
||||
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||
}
|
||||
// get the docFreq without seeking
|
||||
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
|
||||
te.seekExact(t.bytes(), state);
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
|
||||
}
|
||||
|
||||
|
@ -264,10 +269,9 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
||||
// only called from assert
|
||||
final Terms terms = reader.terms(field);
|
||||
return terms == null || terms.docFreq(bytes) == 0;
|
||||
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
||||
return reader.docFreq(field, bytes) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Set;
|
|||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
|
@ -41,13 +40,13 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
*/
|
||||
public class TermQuery extends Query {
|
||||
private final Term term;
|
||||
private int docFreq;
|
||||
private transient TermContext perReaderTermState;
|
||||
private final int docFreq;
|
||||
private final TermContext perReaderTermState;
|
||||
|
||||
final class TermWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.Stats stats;
|
||||
private transient TermContext termStates;
|
||||
private final TermContext termStates;
|
||||
|
||||
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
||||
throws IOException {
|
||||
|
@ -108,7 +107,7 @@ public class TermQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
||||
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
||||
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
return termsEnum;
|
||||
}
|
||||
|
@ -116,8 +115,7 @@ public class TermQuery extends Query {
|
|||
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
||||
// only called from assert
|
||||
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
|
||||
final Terms terms = reader.terms(field);
|
||||
return terms == null || terms.docFreq(bytes) == 0;
|
||||
return reader.docFreq(field, bytes) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -26,7 +25,6 @@ import org.apache.lucene.index.TermState;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
@ -99,7 +97,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
if (fields != null) {
|
||||
final Terms terms = fields.terms(term.field());
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term.bytes(), true)) {
|
||||
state = termsEnum.termState();
|
||||
} else {
|
||||
|
@ -119,7 +117,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
return TermSpans.EMPTY_TERM_SPANS;
|
||||
}
|
||||
|
||||
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
||||
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
|
||||
|
|
|
@ -46,7 +46,6 @@ public final class TermContext {
|
|||
|
||||
//public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
|
||||
/**
|
||||
* Creates an empty {@link TermContext} from a {@link ReaderContext}
|
||||
*/
|
||||
|
@ -94,7 +93,7 @@ public final class TermContext {
|
|||
if (fields != null) {
|
||||
final Terms terms = fields.terms(field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(bytes, cache)) {
|
||||
final TermState termState = termsEnum.termState();
|
||||
//if (DEBUG) System.out.println(" found");
|
||||
|
|
|
@ -349,7 +349,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
||||
// now reuse and check again
|
||||
disi = r.terms("foo").docs(null, new BytesRef("bar"), disi);
|
||||
TermsEnum te = r.terms("foo").iterator(null);
|
||||
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||
disi = te.docs(null, disi);
|
||||
docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
@ -372,7 +374,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
||||
// now reuse and check again
|
||||
disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi);
|
||||
TermsEnum te = r.terms("foo").iterator(null);
|
||||
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||
disi = te.docsAndPositions(null, disi);
|
||||
docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
|
|
@ -199,7 +199,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
writer.close();
|
||||
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
||||
|
||||
DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null);
|
||||
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
|
||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||
int freq = termPositions.freq();
|
||||
assertEquals(3, freq);
|
||||
|
@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase {
|
|||
writer.close();
|
||||
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
||||
|
||||
DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null);
|
||||
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1"));
|
||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(0, termPositions.nextPosition());
|
||||
|
||||
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null);
|
||||
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2"));
|
||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||
assertEquals(2, termPositions.freq());
|
||||
assertEquals(1, termPositions.nextPosition());
|
||||
assertEquals(3, termPositions.nextPosition());
|
||||
|
||||
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null);
|
||||
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3"));
|
||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||
assertEquals(1, termPositions.freq());
|
||||
assertEquals(2, termPositions.nextPosition());
|
||||
|
|
|
@ -1340,13 +1340,12 @@ public class TestIndexReader extends LuceneTestCase
|
|||
writer.addDocument(d);
|
||||
IndexReader r = writer.getReader();
|
||||
writer.close();
|
||||
Terms terms = MultiFields.getTerms(r, "f");
|
||||
try {
|
||||
// Make sure codec impls totalTermFreq (eg PreFlex doesn't)
|
||||
Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1);
|
||||
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
|
||||
assertEquals(2, terms.totalTermFreq(new BytesRef("a")));
|
||||
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
|
||||
Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1);
|
||||
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
|
||||
assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a")));
|
||||
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
|
||||
} finally {
|
||||
r.close();
|
||||
dir.close();
|
||||
|
|
|
@ -113,7 +113,6 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
for(int delDoc : deleted) {
|
||||
assertFalse(liveDocs.get(delDoc));
|
||||
}
|
||||
Terms terms2 = MultiFields.getTerms(reader, "field");
|
||||
|
||||
for(int i=0;i<100;i++) {
|
||||
BytesRef term = terms.get(random.nextInt(terms.size()));
|
||||
|
@ -121,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
|
||||
}
|
||||
|
||||
DocsEnum docsEnum = terms2.docs(liveDocs, term, null);
|
||||
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
|
||||
assertNotNull(docsEnum);
|
||||
|
||||
for(int docID : docs.get(term)) {
|
||||
|
|
|
@ -447,7 +447,7 @@ public class TestOmitTf extends LuceneTestCase {
|
|||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
Terms terms = MultiFields.getTerms(ir, "foo");
|
||||
assertEquals(-1, terms.totalTermFreq(new BytesRef("bar")));
|
||||
assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar")));
|
||||
assertEquals(-1, terms.getSumTotalTermFreq());
|
||||
ir.close();
|
||||
dir.close();
|
||||
|
|
|
@ -342,6 +342,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
return;
|
||||
}
|
||||
Terms terms2 = fields.terms(idField);
|
||||
TermsEnum termsEnum2 = terms2.iterator(null);
|
||||
|
||||
DocsEnum termDocs1 = null;
|
||||
DocsEnum termDocs2 = null;
|
||||
|
@ -354,7 +355,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
|
||||
termDocs2 = terms2.docs(liveDocs2, term, termDocs2);
|
||||
if (termsEnum2.seekExact(term, false)) {
|
||||
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
|
||||
} else {
|
||||
termDocs2 = null;
|
||||
}
|
||||
|
||||
if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
|
||||
// This doc is deleted and wasn't replaced
|
||||
|
@ -397,11 +402,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" " + field + ":");
|
||||
Terms terms3 = fieldsEnum.terms();
|
||||
assertNotNull(terms3);
|
||||
TermsEnum termsEnum2 = terms3.iterator(null);
|
||||
TermsEnum termsEnum3 = terms3.iterator(null);
|
||||
BytesRef term2;
|
||||
while((term2 = termsEnum2.next()) != null) {
|
||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
|
||||
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
|
||||
while((term2 = termsEnum3.next()) != null) {
|
||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
|
||||
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
|
||||
if (dpEnum != null) {
|
||||
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dpEnum.freq();
|
||||
|
@ -410,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||
}
|
||||
} else {
|
||||
dEnum = termsEnum2.docs(null, dEnum);
|
||||
dEnum = termsEnum3.docs(null, dEnum);
|
||||
assertNotNull(dEnum);
|
||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dEnum.freq();
|
||||
|
@ -431,11 +436,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" " + field + ":");
|
||||
Terms terms3 = fieldsEnum.terms();
|
||||
assertNotNull(terms3);
|
||||
TermsEnum termsEnum2 = terms3.iterator(null);
|
||||
TermsEnum termsEnum3 = terms3.iterator(null);
|
||||
BytesRef term2;
|
||||
while((term2 = termsEnum2.next()) != null) {
|
||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
|
||||
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
|
||||
while((term2 = termsEnum3.next()) != null) {
|
||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
|
||||
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
|
||||
if (dpEnum != null) {
|
||||
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dpEnum.freq();
|
||||
|
@ -444,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||
}
|
||||
} else {
|
||||
dEnum = termsEnum2.docs(null, dEnum);
|
||||
dEnum = termsEnum3.docs(null, dEnum);
|
||||
assertNotNull(dEnum);
|
||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dEnum.freq();
|
||||
|
@ -467,7 +472,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
|
||||
String field1=null, field2=null;
|
||||
TermsEnum termsEnum1 = null;
|
||||
TermsEnum termsEnum2 = null;
|
||||
termsEnum2 = null;
|
||||
DocsEnum docs1=null, docs2=null;
|
||||
|
||||
// pack both doc and freq into single element for easy sorting
|
||||
|
|
|
@ -59,7 +59,17 @@ public class TFValueSource extends TermFreqValueSource {
|
|||
|
||||
public void reset() throws IOException {
|
||||
// no one should call us for deleted docs?
|
||||
docs = terms==null ? null : terms.docs(null, indexedBytes, null);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||
docs = termsEnum.docs(null, null);
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
|
||||
if (docs == null) {
|
||||
docs = new DocsEnum() {
|
||||
@Override
|
||||
|
|
|
@ -51,7 +51,18 @@ public class TermFreqValueSource extends DocFreqValueSource {
|
|||
|
||||
public void reset() throws IOException {
|
||||
// no one should call us for deleted docs?
|
||||
docs = terms == null ? null : terms.docs(null, indexedBytes, null);
|
||||
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||
docs = termsEnum.docs(null, null);
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
|
||||
if (docs == null) {
|
||||
docs = new DocsEnum() {
|
||||
@Override
|
||||
|
|
|
@ -555,7 +555,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
Terms terms = fields.terms(t.field());
|
||||
if (terms == null) return -1;
|
||||
BytesRef termBytes = t.bytes();
|
||||
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null);
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (!termsEnum.seekExact(termBytes, false)) {
|
||||
return -1;
|
||||
}
|
||||
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||
if (docs == null) return -1;
|
||||
int id = docs.nextDoc();
|
||||
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
|
||||
|
@ -947,7 +951,13 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
BytesRef termBytes = t.bytes();
|
||||
|
||||
Bits liveDocs = reader.getLiveDocs();
|
||||
DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null);
|
||||
DocsEnum docsEnum = null;
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(termBytes, false)) {
|
||||
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||
}
|
||||
}
|
||||
|
||||
if (docsEnum != null) {
|
||||
DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult();
|
||||
|
|
|
@ -723,8 +723,11 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
|
|||
Terms terms = fields.terms(t.field());
|
||||
if (terms == null) return -1;
|
||||
BytesRef termBytes = t.bytes();
|
||||
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null);
|
||||
if (docs == null) return -1;
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (!termsEnum.seekExact(termBytes, false)) {
|
||||
return -1;
|
||||
}
|
||||
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
|
||||
int id = docs.nextDoc();
|
||||
if (id != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int next = docs.nextDoc();
|
||||
|
|
Loading…
Reference in New Issue