mirror of https://github.com/apache/lucene.git
LUCENE-3562: stop caching thread-private TermsEnums in Terms
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1203294 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5a3b635239
commit
9e27723b37
|
@ -21,9 +21,7 @@ import java.io.File;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
|
* Utility to get document frequency and total number of occurrences (sum of the tf for each doc) of a term.
|
||||||
|
@ -50,10 +48,9 @@ public class GetTermInfo {
|
||||||
|
|
||||||
public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception {
|
public static void getTermInfo(Directory dir, String field, BytesRef termtext) throws Exception {
|
||||||
IndexReader reader = IndexReader.open(dir);
|
IndexReader reader = IndexReader.open(dir);
|
||||||
Terms terms =MultiFields.getTerms(reader, field);
|
|
||||||
long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext);
|
long totalTF = HighFreqTerms.getTotalTermFreq(reader, field, termtext);
|
||||||
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
|
System.out.printf("%s:%s \t totalTF = %,d \t doc freq = %,d \n",
|
||||||
field, termtext.utf8ToString(), totalTF, terms.docFreq(termtext));
|
field, termtext.utf8ToString(), totalTF, reader.docFreq(field, termtext));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void usage() {
|
private static void usage() {
|
||||||
|
|
|
@ -84,21 +84,6 @@ public class FilterIndexReader extends IndexReader {
|
||||||
return in.getComparator();
|
return in.getComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docFreq(BytesRef text) throws IOException {
|
|
||||||
return in.docFreq(text);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
|
|
||||||
return in.docs(liveDocs, text, reuse);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
|
|
||||||
return in.docsAndPositions(liveDocs, text, reuse);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getUniqueTermCount() throws IOException {
|
public long getUniqueTermCount() throws IOException {
|
||||||
return in.getUniqueTermCount();
|
return in.getUniqueTermCount();
|
||||||
|
|
|
@ -991,7 +991,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return terms.docFreq(term);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(term, true)) {
|
||||||
|
return termsEnum.docFreq();
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the number of documents containing the term
|
/** Returns the number of documents containing the term
|
||||||
|
@ -1008,7 +1013,12 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
if (terms == null) {
|
if (terms == null) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return terms.totalTermFreq(term);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(term, true)) {
|
||||||
|
return termsEnum.totalTermFreq();
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This may return null if the field does not exist.*/
|
/** This may return null if the field does not exist.*/
|
||||||
|
@ -1027,15 +1037,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
assert field != null;
|
assert field != null;
|
||||||
assert term != null;
|
assert term != null;
|
||||||
final Fields fields = fields();
|
final Fields fields = fields();
|
||||||
if (fields == null) {
|
if (fields != null) {
|
||||||
return null;
|
final Terms terms = fields.terms(field);
|
||||||
}
|
if (terms != null) {
|
||||||
final Terms terms = fields.terms(field);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms != null) {
|
if (termsEnum.seekExact(term, true)) {
|
||||||
return terms.docs(liveDocs, term, null);
|
return termsEnum.docs(liveDocs, null);
|
||||||
} else {
|
}
|
||||||
return null;
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||||
|
@ -1046,15 +1057,16 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
assert field != null;
|
assert field != null;
|
||||||
assert term != null;
|
assert term != null;
|
||||||
final Fields fields = fields();
|
final Fields fields = fields();
|
||||||
if (fields == null) {
|
if (fields != null) {
|
||||||
return null;
|
final Terms terms = fields.terms(field);
|
||||||
}
|
if (terms != null) {
|
||||||
final Terms terms = fields.terms(field);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms != null) {
|
if (termsEnum.seekExact(term, true)) {
|
||||||
return terms.docsAndPositions(liveDocs, term, null);
|
return termsEnum.docsAndPositions(liveDocs, null);
|
||||||
} else {
|
}
|
||||||
return null;
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1066,15 +1078,15 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
assert state != null;
|
assert state != null;
|
||||||
assert field != null;
|
assert field != null;
|
||||||
final Fields fields = fields();
|
final Fields fields = fields();
|
||||||
if (fields == null) {
|
if (fields != null) {
|
||||||
return null;
|
final Terms terms = fields.terms(field);
|
||||||
}
|
if (terms != null) {
|
||||||
final Terms terms = fields.terms(field);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms != null) {
|
termsEnum.seekExact(term, state);
|
||||||
return terms.docs(liveDocs, term, state, null);
|
return termsEnum.docs(liveDocs, null);
|
||||||
} else {
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1086,15 +1098,15 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
||||||
assert state != null;
|
assert state != null;
|
||||||
assert field != null;
|
assert field != null;
|
||||||
final Fields fields = fields();
|
final Fields fields = fields();
|
||||||
if (fields == null) {
|
if (fields != null) {
|
||||||
return null;
|
final Terms terms = fields.terms(field);
|
||||||
}
|
if (terms != null) {
|
||||||
final Terms terms = fields.terms(field);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (terms != null) {
|
termsEnum.seekExact(term, state);
|
||||||
return terms.docsAndPositions(liveDocs, term, state, null);
|
return termsEnum.docsAndPositions(liveDocs, null);
|
||||||
} else {
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -156,10 +156,12 @@ public final class MultiFields extends Fields {
|
||||||
assert term != null;
|
assert term != null;
|
||||||
final Terms terms = getTerms(r, field);
|
final Terms terms = getTerms(r, field);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
return terms.docs(liveDocs, term, null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
} else {
|
if (termsEnum.seekExact(term, true)) {
|
||||||
return null;
|
return termsEnum.docs(liveDocs, null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||||
|
@ -170,10 +172,12 @@ public final class MultiFields extends Fields {
|
||||||
assert term != null;
|
assert term != null;
|
||||||
final Terms terms = getTerms(r, field);
|
final Terms terms = getTerms(r, field);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
return terms.docsAndPositions(liveDocs, term, null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
} else {
|
if (termsEnum.seekExact(term, true)) {
|
||||||
return null;
|
return termsEnum.docsAndPositions(liveDocs, null);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
|
public MultiFields(Fields[] subs, ReaderUtil.Slice[] subSlices) {
|
||||||
|
@ -233,6 +237,17 @@ public final class MultiFields extends Fields {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static long totalTermFreq(IndexReader r, String field, BytesRef text) throws IOException {
|
||||||
|
final Terms terms = getTerms(r, field);
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(text, true)) {
|
||||||
|
return termsEnum.totalTermFreq();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getUniqueFieldCount() {
|
public int getUniqueFieldCount() {
|
||||||
return terms.size();
|
return terms.size();
|
||||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.lucene.index.codecs.TermVectorsReader;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.util.BitVector;
|
import org.apache.lucene.util.BitVector;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
import org.apache.lucene.util.CloseableThreadLocal;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
|
@ -473,17 +472,6 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
||||||
return core.fields;
|
return core.fields;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docFreq(String field, BytesRef term) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
Terms terms = core.fields.terms(field);
|
|
||||||
if (terms != null) {
|
|
||||||
return terms.docFreq(term);
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int numDocs() {
|
public int numDocs() {
|
||||||
// Don't call ensureOpen() here (it could affect performance)
|
// Don't call ensureOpen() here (it could affect performance)
|
||||||
|
|
|
@ -20,9 +20,7 @@ package org.apache.lucene.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CloseableThreadLocal;
|
|
||||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -32,10 +30,6 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||||
|
|
||||||
public abstract class Terms {
|
public abstract class Terms {
|
||||||
|
|
||||||
// Privately cache a TermsEnum per-thread for looking up
|
|
||||||
// docFreq and getting a private DocsEnum
|
|
||||||
private final CloseableThreadLocal<TermsEnum> threadEnums = new CloseableThreadLocal<TermsEnum>();
|
|
||||||
|
|
||||||
/** Returns an iterator that will step through all
|
/** Returns an iterator that will step through all
|
||||||
* terms. This method will not return null. If you have
|
* terms. This method will not return null. If you have
|
||||||
* a previous TermsEnum, for example from a different
|
* a previous TermsEnum, for example from a different
|
||||||
|
@ -83,81 +77,6 @@ public abstract class Terms {
|
||||||
* reuse it. */
|
* reuse it. */
|
||||||
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
||||||
|
|
||||||
/** Returns the number of documents containing the
|
|
||||||
* specified term text. Returns 0 if the term does not
|
|
||||||
* exist. */
|
|
||||||
public int docFreq(BytesRef text) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
if (termsEnum.seekExact(text, true)) {
|
|
||||||
return termsEnum.docFreq();
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the total number of occurrences of this term
|
|
||||||
* across all documents (the sum of the freq() for each
|
|
||||||
* doc that has this term). This will be -1 if the
|
|
||||||
* codec doesn't support this measure. Note that, like
|
|
||||||
* other term measures, this measure does not take
|
|
||||||
* deleted documents into account. */
|
|
||||||
public long totalTermFreq(BytesRef text) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
if (termsEnum.seekExact(text, true)) {
|
|
||||||
return termsEnum.totalTermFreq();
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get {@link DocsEnum} for the specified term. This
|
|
||||||
* method may return null if the term does not exist. */
|
|
||||||
public DocsEnum docs(Bits liveDocs, BytesRef text, DocsEnum reuse) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
if (termsEnum.seekExact(text, true)) {
|
|
||||||
return termsEnum.docs(liveDocs, reuse);
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get {@link DocsEnum} for the specified term. This
|
|
||||||
* method will may return null if the term does not
|
|
||||||
* exists, or positions were not indexed. */
|
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef text, DocsAndPositionsEnum reuse) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
if (termsEnum.seekExact(text, true)) {
|
|
||||||
return termsEnum.docsAndPositions(liveDocs, reuse);
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: Get {@link DocsEnum} for the specified {@link TermState}.
|
|
||||||
* This method may return <code>null</code> if the term does not exist.
|
|
||||||
*
|
|
||||||
* @see TermsEnum#termState()
|
|
||||||
* @see TermsEnum#seekExact(BytesRef, TermState) */
|
|
||||||
public DocsEnum docs(Bits liveDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
termsEnum.seekExact(term, termState);
|
|
||||||
return termsEnum.docs(liveDocs, reuse);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get {@link DocsEnum} for the specified {@link TermState}. This
|
|
||||||
* method will may return <code>null</code> if the term does not exists, or positions were
|
|
||||||
* not indexed.
|
|
||||||
*
|
|
||||||
* @see TermsEnum#termState()
|
|
||||||
* @see TermsEnum#seekExact(BytesRef, TermState) */
|
|
||||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
|
|
||||||
final TermsEnum termsEnum = getThreadTermsEnum();
|
|
||||||
termsEnum.seekExact(term, termState);
|
|
||||||
return termsEnum.docsAndPositions(liveDocs, reuse);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Returns the number of terms for this field, or -1 if this
|
/** Returns the number of terms for this field, or -1 if this
|
||||||
* measure isn't stored by the codec. Note that, just like
|
* measure isn't stored by the codec. Note that, just like
|
||||||
* other term measures, this measure does not take deleted
|
* other term measures, this measure does not take deleted
|
||||||
|
@ -172,7 +91,7 @@ public abstract class Terms {
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract long getSumTotalTermFreq() throws IOException;
|
public abstract long getSumTotalTermFreq() throws IOException;
|
||||||
|
|
||||||
/** Returns the sum of {@link #docFreq(BytesRef)} for
|
/** Returns the sum of {@link TermsEnum#docFreq()} for
|
||||||
* all terms in this field, or -1 if this measure isn't
|
* all terms in this field, or -1 if this measure isn't
|
||||||
* stored by the codec. Note that, just like other term
|
* stored by the codec. Note that, just like other term
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
|
@ -185,34 +104,6 @@ public abstract class Terms {
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract int getDocCount() throws IOException;
|
public abstract int getDocCount() throws IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a thread-private {@link TermsEnum} instance. Obtaining
|
|
||||||
* {@link TermsEnum} from this method might be more efficient than using
|
|
||||||
* {@link #iterator(TermsEnum)} directly since this method doesn't necessarily create a
|
|
||||||
* new {@link TermsEnum} instance.
|
|
||||||
* <p>
|
|
||||||
* NOTE: {@link TermsEnum} instances obtained from this method must not be
|
|
||||||
* shared across threads. The enum should only be used within a local context
|
|
||||||
* where other threads can't access it.
|
|
||||||
*
|
|
||||||
* @return a thread-private {@link TermsEnum} instance
|
|
||||||
* @throws IOException
|
|
||||||
* if an IOException occurs
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public TermsEnum getThreadTermsEnum() throws IOException {
|
|
||||||
TermsEnum termsEnum = threadEnums.get();
|
|
||||||
if (termsEnum == null) {
|
|
||||||
termsEnum = iterator(null);
|
|
||||||
threadEnums.set(termsEnum);
|
|
||||||
}
|
|
||||||
return termsEnum;
|
|
||||||
}
|
|
||||||
|
|
||||||
// subclass must close when done:
|
|
||||||
protected void close() {
|
|
||||||
threadEnums.close();
|
|
||||||
}
|
|
||||||
public final static Terms[] EMPTY_ARRAY = new Terms[0];
|
public final static Terms[] EMPTY_ARRAY = new Terms[0];
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -181,14 +180,8 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
try {
|
if (postingsReader != null) {
|
||||||
if (postingsReader != null) {
|
postingsReader.close();
|
||||||
postingsReader.close();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
for(FieldReader field : fields.values()) {
|
|
||||||
field.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -238,7 +231,7 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class FieldReader extends Terms implements Closeable {
|
private class FieldReader extends Terms {
|
||||||
final long numTerms;
|
final long numTerms;
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
final long termsStartPointer;
|
final long termsStartPointer;
|
||||||
|
@ -261,11 +254,6 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -194,9 +193,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
try {
|
try {
|
||||||
IOUtils.close(in, postingsReader);
|
IOUtils.close(in, postingsReader);
|
||||||
} finally {
|
} finally {
|
||||||
for(FieldReader field : fields.values()) {
|
|
||||||
field.close();
|
|
||||||
}
|
|
||||||
// Clear so refs to terms index is GCable even if
|
// Clear so refs to terms index is GCable even if
|
||||||
// app hangs onto us:
|
// app hangs onto us:
|
||||||
fields.clear();
|
fields.clear();
|
||||||
|
@ -392,7 +388,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
|
final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
|
||||||
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
|
final BytesRef NO_OUTPUT = fstOutputs.getNoOutput();
|
||||||
|
|
||||||
public final class FieldReader extends Terms implements Closeable {
|
public final class FieldReader extends Terms {
|
||||||
final long numTerms;
|
final long numTerms;
|
||||||
final FieldInfo fieldInfo;
|
final FieldInfo fieldInfo;
|
||||||
final long sumTotalTermFreq;
|
final long sumTotalTermFreq;
|
||||||
|
@ -450,11 +446,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
|
@ -744,7 +735,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final BytesRef savedStartTerm;
|
private BytesRef savedStartTerm;
|
||||||
|
|
||||||
// TODO: in some cases we can filter by length? eg
|
// TODO: in some cases we can filter by length? eg
|
||||||
// regexp foo*bar must be at least length 6 bytes
|
// regexp foo*bar must be at least length 6 bytes
|
||||||
|
@ -784,7 +775,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
f.load(rootCode);
|
f.load(rootCode);
|
||||||
|
|
||||||
// for assert:
|
// for assert:
|
||||||
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
|
assert setSavedStartTerm(startTerm);
|
||||||
|
|
||||||
currentFrame = f;
|
currentFrame = f;
|
||||||
if (startTerm != null) {
|
if (startTerm != null) {
|
||||||
|
@ -792,6 +783,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only for assert:
|
||||||
|
private boolean setSavedStartTerm(BytesRef startTerm) {
|
||||||
|
savedStartTerm = startTerm == null ? null : new BytesRef(startTerm);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermState termState() throws IOException {
|
public TermState termState() throws IOException {
|
||||||
currentFrame.decodeMetaData();
|
currentFrame.decodeMetaData();
|
||||||
|
@ -1163,7 +1160,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
// Iterates through terms in this field
|
// Iterates through terms in this field
|
||||||
private final class SegmentTermsEnum extends TermsEnum {
|
private final class SegmentTermsEnum extends TermsEnum {
|
||||||
private final IndexInput in;
|
private IndexInput in;
|
||||||
|
|
||||||
private Frame[] stack;
|
private Frame[] stack;
|
||||||
private final Frame staticFrame;
|
private final Frame staticFrame;
|
||||||
|
@ -1182,29 +1179,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
final BytesRef term = new BytesRef();
|
final BytesRef term = new BytesRef();
|
||||||
|
|
||||||
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
|
@SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
|
||||||
|
|
||||||
public SegmentTermsEnum() throws IOException {
|
public SegmentTermsEnum() throws IOException {
|
||||||
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
|
//if (DEBUG) System.out.println("BTTR.init seg=" + segment);
|
||||||
in = (IndexInput) BlockTreeTermsReader.this.in.clone();
|
stack = new Frame[0];
|
||||||
stack = new Frame[5];
|
|
||||||
for(int stackOrd=0;stackOrd<stack.length;stackOrd++) {
|
|
||||||
stack[stackOrd] = new Frame(stackOrd);
|
|
||||||
}
|
|
||||||
// Used to hold seek by TermState, or cached seek
|
// Used to hold seek by TermState, or cached seek
|
||||||
staticFrame = new Frame(-1);
|
staticFrame = new Frame(-1);
|
||||||
|
|
||||||
|
// Init w/ root block; don't use index since it may
|
||||||
|
// not (and need not) have been loaded
|
||||||
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
|
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
|
||||||
arcs[arcIdx] = new FST.Arc<BytesRef>();
|
arcs[arcIdx] = new FST.Arc<BytesRef>();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init w/ root block; don't use index since it may
|
|
||||||
// not (and need not) have been loaded
|
|
||||||
//final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
|
|
||||||
|
|
||||||
// Empty string prefix must have an output in the index!
|
|
||||||
//assert arc.isFinal();
|
|
||||||
|
|
||||||
currentFrame = staticFrame;
|
currentFrame = staticFrame;
|
||||||
final FST.Arc<BytesRef> arc;
|
final FST.Arc<BytesRef> arc;
|
||||||
if (index != null) {
|
if (index != null) {
|
||||||
|
@ -1214,8 +1203,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
} else {
|
} else {
|
||||||
arc = null;
|
arc = null;
|
||||||
}
|
}
|
||||||
currentFrame = pushFrame(arc, rootCode, 0);
|
currentFrame = staticFrame;
|
||||||
currentFrame.loadBlock();
|
//currentFrame = pushFrame(arc, rootCode, 0);
|
||||||
|
//currentFrame.loadBlock();
|
||||||
validIndexPrefix = 0;
|
validIndexPrefix = 0;
|
||||||
// if (DEBUG) {
|
// if (DEBUG) {
|
||||||
// System.out.println("init frame state " + currentFrame.ord);
|
// System.out.println("init frame state " + currentFrame.ord);
|
||||||
|
@ -1226,6 +1216,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
// computeBlockStats().print(System.out);
|
// computeBlockStats().print(System.out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void initIndexInput() {
|
||||||
|
if (this.in == null) {
|
||||||
|
this.in = (IndexInput) BlockTreeTermsReader.this.in.clone();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Runs next() through the entire terms dict,
|
/** Runs next() through the entire terms dict,
|
||||||
* computing aggregate statistics. */
|
* computing aggregate statistics. */
|
||||||
public Stats computeBlockStats() throws IOException {
|
public Stats computeBlockStats() throws IOException {
|
||||||
|
@ -1975,6 +1971,20 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
@Override
|
@Override
|
||||||
public BytesRef next() throws IOException {
|
public BytesRef next() throws IOException {
|
||||||
|
|
||||||
|
if (in == null) {
|
||||||
|
// Fresh TermsEnum; seek to first term:
|
||||||
|
final FST.Arc<BytesRef> arc;
|
||||||
|
if (index != null) {
|
||||||
|
arc = index.getFirstArc(arcs[0]);
|
||||||
|
// Empty string prefix must have an output in the index!
|
||||||
|
assert arc.isFinal();
|
||||||
|
} else {
|
||||||
|
arc = null;
|
||||||
|
}
|
||||||
|
currentFrame = pushFrame(arc, rootCode, 0);
|
||||||
|
currentFrame.loadBlock();
|
||||||
|
}
|
||||||
|
|
||||||
targetBeforeCurrentLength = currentFrame.ord;
|
targetBeforeCurrentLength = currentFrame.ord;
|
||||||
|
|
||||||
assert !eof;
|
assert !eof;
|
||||||
|
@ -2242,6 +2252,11 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
use. */
|
use. */
|
||||||
void loadBlock() throws IOException {
|
void loadBlock() throws IOException {
|
||||||
|
|
||||||
|
// Clone the IndexInput lazily, so that consumers
|
||||||
|
// that just pull a TermsEnum to
|
||||||
|
// seekExact(TermState) don't pay this cost:
|
||||||
|
initIndexInput();
|
||||||
|
|
||||||
if (nextEnt != -1) {
|
if (nextEnt != -1) {
|
||||||
// Already loaded
|
// Already loaded
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -20,20 +20,23 @@ package org.apache.lucene.search;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.lucene.util.TermContext;
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
|
* MultiPhraseQuery is a generalized version of PhraseQuery, with an added
|
||||||
|
@ -134,6 +137,7 @@ public class MultiPhraseQuery extends Query {
|
||||||
private class MultiPhraseWeight extends Weight {
|
private class MultiPhraseWeight extends Weight {
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.Stats stats;
|
private final Similarity.Stats stats;
|
||||||
|
private final Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
|
||||||
|
|
||||||
public MultiPhraseWeight(IndexSearcher searcher)
|
public MultiPhraseWeight(IndexSearcher searcher)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -144,7 +148,11 @@ public class MultiPhraseQuery extends Query {
|
||||||
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
|
ArrayList<TermStatistics> allTermStats = new ArrayList<TermStatistics>();
|
||||||
for(final Term[] terms: termArrays) {
|
for(final Term[] terms: termArrays) {
|
||||||
for (Term term: terms) {
|
for (Term term: terms) {
|
||||||
TermContext termContext = TermContext.build(context, term, true);
|
TermContext termContext = termContexts.get(term);
|
||||||
|
if (termContext == null) {
|
||||||
|
termContext = TermContext.build(context, term, true);
|
||||||
|
termContexts.put(term, termContext);
|
||||||
|
}
|
||||||
allTermStats.add(searcher.termStatistics(term, termContext));
|
allTermStats.add(searcher.termStatistics(term, termContext));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -174,6 +182,14 @@ public class MultiPhraseQuery extends Query {
|
||||||
|
|
||||||
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
|
PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()];
|
||||||
|
|
||||||
|
final Terms fieldTerms = reader.terms(field);
|
||||||
|
if (fieldTerms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reuse single TermsEnum below:
|
||||||
|
final TermsEnum termsEnum = fieldTerms.iterator(null);
|
||||||
|
|
||||||
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
||||||
Term[] terms = termArrays.get(pos);
|
Term[] terms = termArrays.get(pos);
|
||||||
|
|
||||||
|
@ -181,31 +197,43 @@ public class MultiPhraseQuery extends Query {
|
||||||
int docFreq;
|
int docFreq;
|
||||||
|
|
||||||
if (terms.length > 1) {
|
if (terms.length > 1) {
|
||||||
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, reader, terms);
|
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
|
||||||
|
|
||||||
// coarse -- this overcounts since a given doc can
|
// coarse -- this overcounts since a given doc can
|
||||||
// have more than one terms:
|
// have more than one term:
|
||||||
docFreq = 0;
|
docFreq = 0;
|
||||||
for(int termIdx=0;termIdx<terms.length;termIdx++) {
|
for(int termIdx=0;termIdx<terms.length;termIdx++) {
|
||||||
docFreq += reader.docFreq(terms[termIdx]);
|
final Term term = terms[termIdx];
|
||||||
|
TermState termState = termContexts.get(term).get(context.ord);
|
||||||
|
if (termState == null) {
|
||||||
|
// Term not in reader
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
termsEnum.seekExact(term.bytes(), termState);
|
||||||
|
docFreq += termsEnum.docFreq();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (docFreq == 0) {
|
||||||
|
// None of the terms are in this reader
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
final Term term = terms[0];
|
final Term term = terms[0];
|
||||||
postingsEnum = reader.termPositionsEnum(liveDocs,
|
TermState termState = termContexts.get(term).get(context.ord);
|
||||||
term.field(),
|
if (termState == null) {
|
||||||
term.bytes());
|
// Term not in reader
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
termsEnum.seekExact(term.bytes(), termState);
|
||||||
|
postingsEnum = termsEnum.docsAndPositions(liveDocs, null);
|
||||||
|
|
||||||
if (postingsEnum == null) {
|
if (postingsEnum == null) {
|
||||||
if (reader.termDocsEnum(liveDocs, term.field(), term.bytes()) != null) {
|
// term does exist, but has no positions
|
||||||
// term does exist, but has no positions
|
assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
|
||||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||||
} else {
|
|
||||||
// term does not exist
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
docFreq = reader.docFreq(term.field(), term.bytes());
|
docFreq = termsEnum.docFreq();
|
||||||
}
|
}
|
||||||
|
|
||||||
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
|
||||||
|
@ -437,20 +465,22 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||||
private DocsQueue _queue;
|
private DocsQueue _queue;
|
||||||
private IntQueue _posList;
|
private IntQueue _posList;
|
||||||
|
|
||||||
public UnionDocsAndPositionsEnum(Bits liveDocs, IndexReader indexReader, Term[] terms) throws IOException {
|
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
|
||||||
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
|
List<DocsAndPositionsEnum> docsEnums = new LinkedList<DocsAndPositionsEnum>();
|
||||||
for (int i = 0; i < terms.length; i++) {
|
for (int i = 0; i < terms.length; i++) {
|
||||||
DocsAndPositionsEnum postings = indexReader.termPositionsEnum(liveDocs,
|
final Term term = terms[i];
|
||||||
terms[i].field(),
|
TermState termState = termContexts.get(term).get(context.ord);
|
||||||
terms[i].bytes());
|
if (termState == null) {
|
||||||
if (postings != null) {
|
// Term doesn't exist in reader
|
||||||
docsEnums.add(postings);
|
continue;
|
||||||
} else {
|
|
||||||
if (indexReader.termDocsEnum(liveDocs, terms[i].field(), terms[i].bytes()) != null) {
|
|
||||||
// term does exist, but has no positions
|
|
||||||
throw new IllegalStateException("field \"" + terms[i].field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + terms[i].text() + ")");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
termsEnum.seekExact(term.bytes(), termState);
|
||||||
|
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null);
|
||||||
|
if (postings == null) {
|
||||||
|
// term does exist, but has no positions
|
||||||
|
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||||
|
}
|
||||||
|
docsEnums.add(postings);
|
||||||
}
|
}
|
||||||
|
|
||||||
_queue = new DocsQueue(docsEnums);
|
_queue = new DocsQueue(docsEnums);
|
||||||
|
|
|
@ -18,24 +18,24 @@ package org.apache.lucene.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Set;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
import org.apache.lucene.search.similarities.Similarity.SloppyDocScorer;
|
||||||
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.TermContext;
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
|
|
||||||
/** A Query that matches documents containing a particular sequence of terms.
|
/** A Query that matches documents containing a particular sequence of terms.
|
||||||
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
|
* A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
|
||||||
|
@ -222,27 +222,32 @@ public class PhraseQuery extends Query {
|
||||||
final IndexReader reader = context.reader;
|
final IndexReader reader = context.reader;
|
||||||
final Bits liveDocs = acceptDocs;
|
final Bits liveDocs = acceptDocs;
|
||||||
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
|
PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[terms.size()];
|
||||||
|
|
||||||
|
final Terms fieldTerms = reader.terms(field);
|
||||||
|
if (fieldTerms == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reuse single TermsEnum below:
|
||||||
|
final TermsEnum te = fieldTerms.iterator(null);
|
||||||
|
|
||||||
for (int i = 0; i < terms.size(); i++) {
|
for (int i = 0; i < terms.size(); i++) {
|
||||||
final Term t = terms.get(i);
|
final Term t = terms.get(i);
|
||||||
final TermState state = states[i].get(context.ord);
|
final TermState state = states[i].get(context.ord);
|
||||||
if (state == null) { /* term doesnt exist in this segment */
|
if (state == null) { /* term doesnt exist in this segment */
|
||||||
assert termNotInReader(reader, field, t.bytes()) : "no termstate found but term exists in reader";
|
assert termNotInReader(reader, field, t.bytes()): "no termstate found but term exists in reader";
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DocsAndPositionsEnum postingsEnum = reader.termPositionsEnum(liveDocs,
|
te.seekExact(t.bytes(), state);
|
||||||
t.field(),
|
DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null);
|
||||||
t.bytes(),
|
|
||||||
state);
|
|
||||||
// PhraseQuery on a field that did not index
|
// PhraseQuery on a field that did not index
|
||||||
// positions.
|
// positions.
|
||||||
if (postingsEnum == null) {
|
if (postingsEnum == null) {
|
||||||
assert (reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null) : "termstate found but no term exists in reader";
|
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
|
||||||
// term does exist, but has no positions
|
// term does exist, but has no positions
|
||||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||||
}
|
}
|
||||||
// get the docFreq without seeking
|
|
||||||
TermsEnum te = reader.fields().terms(field).getThreadTermsEnum();
|
|
||||||
te.seekExact(t.bytes(), state);
|
|
||||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
|
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -264,10 +269,9 @@ public class PhraseQuery extends Query {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// only called from assert
|
||||||
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
||||||
// only called from assert
|
return reader.docFreq(field, bytes) == 0;
|
||||||
final Terms terms = reader.terms(field);
|
|
||||||
return terms == null || terms.docFreq(bytes) == 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Set;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.TermState;
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||||
|
@ -41,13 +40,13 @@ import org.apache.lucene.util.ToStringUtils;
|
||||||
*/
|
*/
|
||||||
public class TermQuery extends Query {
|
public class TermQuery extends Query {
|
||||||
private final Term term;
|
private final Term term;
|
||||||
private int docFreq;
|
private final int docFreq;
|
||||||
private transient TermContext perReaderTermState;
|
private final TermContext perReaderTermState;
|
||||||
|
|
||||||
final class TermWeight extends Weight {
|
final class TermWeight extends Weight {
|
||||||
private final Similarity similarity;
|
private final Similarity similarity;
|
||||||
private final Similarity.Stats stats;
|
private final Similarity.Stats stats;
|
||||||
private transient TermContext termStates;
|
private final TermContext termStates;
|
||||||
|
|
||||||
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -108,7 +107,7 @@ public class TermQuery extends Query {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
||||||
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
|
||||||
termsEnum.seekExact(term.bytes(), state);
|
termsEnum.seekExact(term.bytes(), state);
|
||||||
return termsEnum;
|
return termsEnum;
|
||||||
}
|
}
|
||||||
|
@ -116,8 +115,7 @@ public class TermQuery extends Query {
|
||||||
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
private boolean termNotInReader(IndexReader reader, String field, BytesRef bytes) throws IOException {
|
||||||
// only called from assert
|
// only called from assert
|
||||||
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
|
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
|
||||||
final Terms terms = reader.terms(field);
|
return reader.docFreq(field, bytes) == 0;
|
||||||
return terms == null || terms.docFreq(bytes) == 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search.spans;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
@ -26,7 +25,6 @@ import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.ReaderUtil;
|
|
||||||
import org.apache.lucene.util.TermContext;
|
import org.apache.lucene.util.TermContext;
|
||||||
import org.apache.lucene.util.ToStringUtils;
|
import org.apache.lucene.util.ToStringUtils;
|
||||||
|
|
||||||
|
@ -99,7 +97,7 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
if (fields != null) {
|
if (fields != null) {
|
||||||
final Terms terms = fields.terms(term.field());
|
final Terms terms = fields.terms(term.field());
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (termsEnum.seekExact(term.bytes(), true)) {
|
if (termsEnum.seekExact(term.bytes(), true)) {
|
||||||
state = termsEnum.termState();
|
state = termsEnum.termState();
|
||||||
} else {
|
} else {
|
||||||
|
@ -119,7 +117,7 @@ public class SpanTermQuery extends SpanQuery {
|
||||||
return TermSpans.EMPTY_TERM_SPANS;
|
return TermSpans.EMPTY_TERM_SPANS;
|
||||||
}
|
}
|
||||||
|
|
||||||
final TermsEnum termsEnum = context.reader.terms(term.field()).getThreadTermsEnum();
|
final TermsEnum termsEnum = context.reader.terms(term.field()).iterator(null);
|
||||||
termsEnum.seekExact(term.bytes(), state);
|
termsEnum.seekExact(term.bytes(), state);
|
||||||
|
|
||||||
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
|
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null);
|
||||||
|
|
|
@ -46,7 +46,6 @@ public final class TermContext {
|
||||||
|
|
||||||
//public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
//public static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates an empty {@link TermContext} from a {@link ReaderContext}
|
* Creates an empty {@link TermContext} from a {@link ReaderContext}
|
||||||
*/
|
*/
|
||||||
|
@ -94,7 +93,7 @@ public final class TermContext {
|
||||||
if (fields != null) {
|
if (fields != null) {
|
||||||
final Terms terms = fields.terms(field);
|
final Terms terms = fields.terms(field);
|
||||||
if (terms != null) {
|
if (terms != null) {
|
||||||
final TermsEnum termsEnum = terms.getThreadTermsEnum(); // thread-private don't share!
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (termsEnum.seekExact(bytes, cache)) {
|
if (termsEnum.seekExact(bytes, cache)) {
|
||||||
final TermState termState = termsEnum.termState();
|
final TermState termState = termsEnum.termState();
|
||||||
//if (DEBUG) System.out.println(" found");
|
//if (DEBUG) System.out.println(" found");
|
||||||
|
|
|
@ -349,7 +349,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
||||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
|
||||||
// now reuse and check again
|
// now reuse and check again
|
||||||
disi = r.terms("foo").docs(null, new BytesRef("bar"), disi);
|
TermsEnum te = r.terms("foo").iterator(null);
|
||||||
|
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||||
|
disi = te.docs(null, disi);
|
||||||
docid = disi.docID();
|
docid = disi.docID();
|
||||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
@ -372,7 +374,9 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
||||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
|
||||||
// now reuse and check again
|
// now reuse and check again
|
||||||
disi = r.terms("foo").docsAndPositions(null, new BytesRef("bar"), disi);
|
TermsEnum te = r.terms("foo").iterator(null);
|
||||||
|
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||||
|
disi = te.docsAndPositions(null, disi);
|
||||||
docid = disi.docID();
|
docid = disi.docID();
|
||||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||||
|
|
|
@ -199,7 +199,7 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
||||||
|
|
||||||
DocsAndPositionsEnum termPositions = reader.fields().terms("f1").docsAndPositions(reader.getLiveDocs(), new BytesRef("a"), null);
|
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"));
|
||||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||||
int freq = termPositions.freq();
|
int freq = termPositions.freq();
|
||||||
assertEquals(3, freq);
|
assertEquals(3, freq);
|
||||||
|
@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase {
|
||||||
writer.close();
|
writer.close();
|
||||||
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
SegmentReader reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random));
|
||||||
|
|
||||||
DocsAndPositionsEnum termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term1"), null);
|
DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1"));
|
||||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||||
assertEquals(1, termPositions.freq());
|
assertEquals(1, termPositions.freq());
|
||||||
assertEquals(0, termPositions.nextPosition());
|
assertEquals(0, termPositions.nextPosition());
|
||||||
|
|
||||||
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term2"), null);
|
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2"));
|
||||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||||
assertEquals(2, termPositions.freq());
|
assertEquals(2, termPositions.freq());
|
||||||
assertEquals(1, termPositions.nextPosition());
|
assertEquals(1, termPositions.nextPosition());
|
||||||
assertEquals(3, termPositions.nextPosition());
|
assertEquals(3, termPositions.nextPosition());
|
||||||
|
|
||||||
termPositions = reader.fields().terms("preanalyzed").docsAndPositions(reader.getLiveDocs(), new BytesRef("term3"), null);
|
termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3"));
|
||||||
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
assertTrue(termPositions.nextDoc() != termPositions.NO_MORE_DOCS);
|
||||||
assertEquals(1, termPositions.freq());
|
assertEquals(1, termPositions.freq());
|
||||||
assertEquals(2, termPositions.nextPosition());
|
assertEquals(2, termPositions.nextPosition());
|
||||||
|
|
|
@ -1340,13 +1340,12 @@ public class TestIndexReader extends LuceneTestCase
|
||||||
writer.addDocument(d);
|
writer.addDocument(d);
|
||||||
IndexReader r = writer.getReader();
|
IndexReader r = writer.getReader();
|
||||||
writer.close();
|
writer.close();
|
||||||
Terms terms = MultiFields.getTerms(r, "f");
|
|
||||||
try {
|
try {
|
||||||
// Make sure codec impls totalTermFreq (eg PreFlex doesn't)
|
// Make sure codec impls totalTermFreq (eg PreFlex doesn't)
|
||||||
Assume.assumeTrue(terms.totalTermFreq(new BytesRef("b")) != -1);
|
Assume.assumeTrue(MultiFields.totalTermFreq(r, "f", new BytesRef("b")) != -1);
|
||||||
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
|
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
|
||||||
assertEquals(2, terms.totalTermFreq(new BytesRef("a")));
|
assertEquals(2, MultiFields.totalTermFreq(r, "f", new BytesRef("a")));
|
||||||
assertEquals(1, terms.totalTermFreq(new BytesRef("b")));
|
assertEquals(1, MultiFields.totalTermFreq(r, "f", new BytesRef("b")));
|
||||||
} finally {
|
} finally {
|
||||||
r.close();
|
r.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
|
@ -113,7 +113,6 @@ public class TestMultiFields extends LuceneTestCase {
|
||||||
for(int delDoc : deleted) {
|
for(int delDoc : deleted) {
|
||||||
assertFalse(liveDocs.get(delDoc));
|
assertFalse(liveDocs.get(delDoc));
|
||||||
}
|
}
|
||||||
Terms terms2 = MultiFields.getTerms(reader, "field");
|
|
||||||
|
|
||||||
for(int i=0;i<100;i++) {
|
for(int i=0;i<100;i++) {
|
||||||
BytesRef term = terms.get(random.nextInt(terms.size()));
|
BytesRef term = terms.get(random.nextInt(terms.size()));
|
||||||
|
@ -121,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
|
||||||
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
|
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
|
||||||
}
|
}
|
||||||
|
|
||||||
DocsEnum docsEnum = terms2.docs(liveDocs, term, null);
|
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
|
||||||
assertNotNull(docsEnum);
|
assertNotNull(docsEnum);
|
||||||
|
|
||||||
for(int docID : docs.get(term)) {
|
for(int docID : docs.get(term)) {
|
||||||
|
|
|
@ -447,7 +447,7 @@ public class TestOmitTf extends LuceneTestCase {
|
||||||
IndexReader ir = iw.getReader();
|
IndexReader ir = iw.getReader();
|
||||||
iw.close();
|
iw.close();
|
||||||
Terms terms = MultiFields.getTerms(ir, "foo");
|
Terms terms = MultiFields.getTerms(ir, "foo");
|
||||||
assertEquals(-1, terms.totalTermFreq(new BytesRef("bar")));
|
assertEquals(-1, MultiFields.totalTermFreq(ir, "foo", new BytesRef("bar")));
|
||||||
assertEquals(-1, terms.getSumTotalTermFreq());
|
assertEquals(-1, terms.getSumTotalTermFreq());
|
||||||
ir.close();
|
ir.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
|
@ -342,6 +342,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Terms terms2 = fields.terms(idField);
|
Terms terms2 = fields.terms(idField);
|
||||||
|
TermsEnum termsEnum2 = terms2.iterator(null);
|
||||||
|
|
||||||
DocsEnum termDocs1 = null;
|
DocsEnum termDocs1 = null;
|
||||||
DocsEnum termDocs2 = null;
|
DocsEnum termDocs2 = null;
|
||||||
|
@ -354,7 +355,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
|
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
|
||||||
termDocs2 = terms2.docs(liveDocs2, term, termDocs2);
|
if (termsEnum2.seekExact(term, false)) {
|
||||||
|
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
|
||||||
|
} else {
|
||||||
|
termDocs2 = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
|
if (termDocs1.nextDoc() == DocsEnum.NO_MORE_DOCS) {
|
||||||
// This doc is deleted and wasn't replaced
|
// This doc is deleted and wasn't replaced
|
||||||
|
@ -397,11 +402,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
System.out.println(" " + field + ":");
|
System.out.println(" " + field + ":");
|
||||||
Terms terms3 = fieldsEnum.terms();
|
Terms terms3 = fieldsEnum.terms();
|
||||||
assertNotNull(terms3);
|
assertNotNull(terms3);
|
||||||
TermsEnum termsEnum2 = terms3.iterator(null);
|
TermsEnum termsEnum3 = terms3.iterator(null);
|
||||||
BytesRef term2;
|
BytesRef term2;
|
||||||
while((term2 = termsEnum2.next()) != null) {
|
while((term2 = termsEnum3.next()) != null) {
|
||||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
|
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
|
||||||
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
|
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
|
||||||
if (dpEnum != null) {
|
if (dpEnum != null) {
|
||||||
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||||
final int freq = dpEnum.freq();
|
final int freq = dpEnum.freq();
|
||||||
|
@ -410,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
System.out.println(" pos=" + dpEnum.nextPosition());
|
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dEnum = termsEnum2.docs(null, dEnum);
|
dEnum = termsEnum3.docs(null, dEnum);
|
||||||
assertNotNull(dEnum);
|
assertNotNull(dEnum);
|
||||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||||
final int freq = dEnum.freq();
|
final int freq = dEnum.freq();
|
||||||
|
@ -431,11 +436,11 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
System.out.println(" " + field + ":");
|
System.out.println(" " + field + ":");
|
||||||
Terms terms3 = fieldsEnum.terms();
|
Terms terms3 = fieldsEnum.terms();
|
||||||
assertNotNull(terms3);
|
assertNotNull(terms3);
|
||||||
TermsEnum termsEnum2 = terms3.iterator(null);
|
TermsEnum termsEnum3 = terms3.iterator(null);
|
||||||
BytesRef term2;
|
BytesRef term2;
|
||||||
while((term2 = termsEnum2.next()) != null) {
|
while((term2 = termsEnum3.next()) != null) {
|
||||||
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum2.totalTermFreq());
|
System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq());
|
||||||
dpEnum = termsEnum2.docsAndPositions(null, dpEnum);
|
dpEnum = termsEnum3.docsAndPositions(null, dpEnum);
|
||||||
if (dpEnum != null) {
|
if (dpEnum != null) {
|
||||||
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
assertTrue(dpEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||||
final int freq = dpEnum.freq();
|
final int freq = dpEnum.freq();
|
||||||
|
@ -444,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
System.out.println(" pos=" + dpEnum.nextPosition());
|
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
dEnum = termsEnum2.docs(null, dEnum);
|
dEnum = termsEnum3.docs(null, dEnum);
|
||||||
assertNotNull(dEnum);
|
assertNotNull(dEnum);
|
||||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||||
final int freq = dEnum.freq();
|
final int freq = dEnum.freq();
|
||||||
|
@ -467,7 +472,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
||||||
|
|
||||||
String field1=null, field2=null;
|
String field1=null, field2=null;
|
||||||
TermsEnum termsEnum1 = null;
|
TermsEnum termsEnum1 = null;
|
||||||
TermsEnum termsEnum2 = null;
|
termsEnum2 = null;
|
||||||
DocsEnum docs1=null, docs2=null;
|
DocsEnum docs1=null, docs2=null;
|
||||||
|
|
||||||
// pack both doc and freq into single element for easy sorting
|
// pack both doc and freq into single element for easy sorting
|
||||||
|
|
|
@ -59,7 +59,17 @@ public class TFValueSource extends TermFreqValueSource {
|
||||||
|
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
// no one should call us for deleted docs?
|
// no one should call us for deleted docs?
|
||||||
docs = terms==null ? null : terms.docs(null, indexedBytes, null);
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||||
|
docs = termsEnum.docs(null, null);
|
||||||
|
} else {
|
||||||
|
docs = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
docs = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (docs == null) {
|
if (docs == null) {
|
||||||
docs = new DocsEnum() {
|
docs = new DocsEnum() {
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -51,7 +51,18 @@ public class TermFreqValueSource extends DocFreqValueSource {
|
||||||
|
|
||||||
public void reset() throws IOException {
|
public void reset() throws IOException {
|
||||||
// no one should call us for deleted docs?
|
// no one should call us for deleted docs?
|
||||||
docs = terms == null ? null : terms.docs(null, indexedBytes, null);
|
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||||
|
docs = termsEnum.docs(null, null);
|
||||||
|
} else {
|
||||||
|
docs = null;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
docs = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (docs == null) {
|
if (docs == null) {
|
||||||
docs = new DocsEnum() {
|
docs = new DocsEnum() {
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -555,7 +555,11 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
Terms terms = fields.terms(t.field());
|
Terms terms = fields.terms(t.field());
|
||||||
if (terms == null) return -1;
|
if (terms == null) return -1;
|
||||||
BytesRef termBytes = t.bytes();
|
BytesRef termBytes = t.bytes();
|
||||||
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(reader), termBytes, null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (!termsEnum.seekExact(termBytes, false)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||||
if (docs == null) return -1;
|
if (docs == null) return -1;
|
||||||
int id = docs.nextDoc();
|
int id = docs.nextDoc();
|
||||||
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
|
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
|
||||||
|
@ -947,7 +951,13 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
||||||
BytesRef termBytes = t.bytes();
|
BytesRef termBytes = t.bytes();
|
||||||
|
|
||||||
Bits liveDocs = reader.getLiveDocs();
|
Bits liveDocs = reader.getLiveDocs();
|
||||||
DocsEnum docsEnum = terms==null ? null : terms.docs(liveDocs, termBytes, null);
|
DocsEnum docsEnum = null;
|
||||||
|
if (terms != null) {
|
||||||
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
if (termsEnum.seekExact(termBytes, false)) {
|
||||||
|
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (docsEnum != null) {
|
if (docsEnum != null) {
|
||||||
DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult();
|
DocsEnum.BulkReadResult readResult = docsEnum.getBulkResult();
|
||||||
|
|
|
@ -723,8 +723,11 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
|
||||||
Terms terms = fields.terms(t.field());
|
Terms terms = fields.terms(t.field());
|
||||||
if (terms == null) return -1;
|
if (terms == null) return -1;
|
||||||
BytesRef termBytes = t.bytes();
|
BytesRef termBytes = t.bytes();
|
||||||
DocsEnum docs = terms.docs(MultiFields.getLiveDocs(r), termBytes, null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
if (docs == null) return -1;
|
if (!termsEnum.seekExact(termBytes, false)) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
|
||||||
int id = docs.nextDoc();
|
int id = docs.nextDoc();
|
||||||
if (id != DocIdSetIterator.NO_MORE_DOCS) {
|
if (id != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
int next = docs.nextDoc();
|
int next = docs.nextDoc();
|
||||||
|
|
Loading…
Reference in New Issue