LUCENE-2929: specify up front if you need freqs from DocsEnum

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1210176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-12-04 18:50:58 +00:00
parent 3408defc8d
commit 961b820e53
88 changed files with 953 additions and 520 deletions

View File

@ -936,7 +936,7 @@ public class MemoryIndex {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
reuse = new MemoryDocsEnum();
}

View File

@ -188,7 +188,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
MemoryIndex memory = new MemoryIndex();
memory.addField("foo", "bar", analyzer);
IndexReader reader = memory.createSearcher().getIndexReader();
DocsEnum disi = reader.termDocsEnum(null, "foo", new BytesRef("bar"));
DocsEnum disi = _TestUtil.docs(random, reader, "foo", new BytesRef("bar"), null, null, false);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@ -196,7 +196,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
// now reuse and check again
TermsEnum te = reader.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docs(null, disi);
disi = te.docs(null, disi, false);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

View File

@ -129,12 +129,19 @@ public class FieldNormModifier {
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
while(termsEnum.next() != null) {
docs = termsEnum.docs(liveDocs, docs);
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
final DocsEnum docs2;
if (docsAndFreqs != null) {
docs2 = docsAndFreqs;
} else {
docs2 = docs = termsEnum.docs(liveDocs, docs, false);
}
while(true) {
int docID = docs.nextDoc();
int docID = docs2.nextDoc();
if (docID != docs.NO_MORE_DOCS) {
termCounts[docID] += docs.freq();
termCounts[docID] += docsAndFreqs == null ? 1 : docsAndFreqs.freq();
} else {
break;
}

View File

@ -201,7 +201,7 @@ public class HighFreqTerms {
return;
}
}
DocsEnum de = r.termDocsEnum(liveDocs, field, termText);
DocsEnum de = r.termDocsEnum(liveDocs, field, termText, true);
if (de != null) {
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
totalTF[0] += de.freq();

View File

@ -29,11 +29,11 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexOutput;
@ -139,7 +139,7 @@ public class TestAppendingCodec extends LuceneTestCase {
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy")));
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog")));
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the")));
DocsEnum de = te.docs(null, null);
DocsEnum de = te.docs(null, null, true);
assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
assertEquals(2, de.freq());
assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);

View File

@ -93,7 +93,7 @@ public class DuplicateFilter extends Filter {
if (currTerm == null) {
break;
} else {
docs = termsEnum.docs(acceptDocs, docs);
docs = termsEnum.docs(acceptDocs, docs, false);
int doc = docs.nextDoc();
if (doc != DocsEnum.NO_MORE_DOCS) {
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {
@ -133,7 +133,7 @@ public class DuplicateFilter extends Filter {
} else {
if (termsEnum.docFreq() > 1) {
// unset potential duplicates
docs = termsEnum.docs(acceptDocs, docs);
docs = termsEnum.docs(acceptDocs, docs, false);
int doc = docs.nextDoc();
if (doc != DocsEnum.NO_MORE_DOCS) {
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {

View File

@ -17,6 +17,9 @@ package org.apache.lucene.sandbox.queries;
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
@ -28,9 +31,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.HashSet;
import org.apache.lucene.util._TestUtil;
public class DuplicateFilterTest extends LuceneTestCase {
private static final String KEY_FIELD = "url";
@ -134,10 +135,13 @@ public class DuplicateFilterTest extends LuceneTestCase {
for (ScoreDoc hit : hits) {
Document d = searcher.doc(hit.doc);
String url = d.get(KEY_FIELD);
DocsEnum td = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
KEY_FIELD,
new BytesRef(url));
DocsEnum td = _TestUtil.docs(random, reader,
KEY_FIELD,
new BytesRef(url),
MultiFields.getLiveDocs(reader),
null,
false);
int lastDoc = 0;
while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) {
lastDoc = td.docID();
@ -155,10 +159,13 @@ public class DuplicateFilterTest extends LuceneTestCase {
for (ScoreDoc hit : hits) {
Document d = searcher.doc(hit.doc);
String url = d.get(KEY_FIELD);
DocsEnum td = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
KEY_FIELD,
new BytesRef(url));
DocsEnum td = _TestUtil.docs(random, reader,
KEY_FIELD,
new BytesRef(url),
MultiFields.getLiveDocs(reader),
null,
false);
int lastDoc = 0;
td.nextDoc();
lastDoc = td.docID();

View File

@ -57,7 +57,7 @@ public class CartesianShapeFilter extends Filter {
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef);
return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false);
}
@Override
@ -70,7 +70,7 @@ public class CartesianShapeFilter extends Filter {
for (int i =0; i< sz; i++) {
double boxId = area.get(i).doubleValue();
NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef);
final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false);
if (docsEnum == null) continue;
// iterate through all documents
// which have this boxId

View File

@ -387,7 +387,7 @@ class BufferedDeletesStream {
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes(), false)) {
DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs);
DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs, false);
//System.out.println("BDS: got docsEnum=" + docsEnum);
if (docsEnum != null) {

View File

@ -683,6 +683,7 @@ public class CheckIndex {
}
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
final FieldsEnum fieldsEnum = fields.iterator();
@ -740,7 +741,8 @@ public class CheckIndex {
status.totFreq += docFreq;
sumDocFreq += docFreq;
docs = termsEnum.docs(liveDocs, docs);
docs = termsEnum.docs(liveDocs, docs, false);
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
postings = termsEnum.docsAndPositions(liveDocs, postings);
if (hasOrd) {
@ -762,13 +764,24 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
final DocsEnum docsAndFreqs2;
final boolean hasPositions;
final boolean hasFreqs;
if (postings != null) {
docs2 = postings;
docsAndFreqs2 = postings;
hasPositions = true;
hasFreqs = true;
} else if (docsAndFreqs != null) {
docs2 = docsAndFreqs;
docsAndFreqs2 = docsAndFreqs;
hasPositions = false;
hasFreqs = true;
} else {
docs2 = docs;
docsAndFreqs2 = null;
hasPositions = false;
hasFreqs = false;
}
int lastDoc = -1;
@ -780,9 +793,15 @@ public class CheckIndex {
break;
}
visitedDocs.set(doc);
final int freq = docs2.freq();
status.totPos += freq;
totalTermFreq += freq;
int freq = -1;
if (hasFreqs) {
freq = docsAndFreqs2.freq();
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
status.totPos += freq;
totalTermFreq += freq;
}
docCount++;
if (doc <= lastDoc) {
@ -793,12 +812,9 @@ public class CheckIndex {
}
lastDoc = doc;
if (freq <= 0) {
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
}
int lastPos = -1;
if (postings != null) {
if (hasPositions) {
for(int j=0;j<freq;j++) {
final int pos = postings.nextPosition();
if (pos < -1) {
@ -820,13 +836,23 @@ public class CheckIndex {
// Re-count if there are deleted docs:
if (reader.hasDeletions()) {
final DocsEnum docsNoDel = termsEnum.docs(null, docs);
docCount = 0;
totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID());
docCount++;
totalTermFreq += docsNoDel.freq();
if (hasFreqs) {
final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs, true);
docCount = 0;
totalTermFreq = 0;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID());
docCount++;
totalTermFreq += docsNoDel.freq();
}
} else {
final DocsEnum docsNoDel = termsEnum.docs(null, docs, false);
docCount = 0;
totalTermFreq = -1;
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visitedDocs.set(docsNoDel.docID());
docCount++;
}
}
}
@ -883,7 +909,7 @@ public class CheckIndex {
} else {
for(int idx=0;idx<7;idx++) {
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
docs = termsEnum.docs(liveDocs, docs);
docs = termsEnum.docs(liveDocs, docs, false);
final int docID = docs.advance(skipDocID);
if (docID == DocsEnum.NO_MORE_DOCS) {
break;
@ -986,7 +1012,7 @@ public class CheckIndex {
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
}
docs = termsEnum.docs(liveDocs, docs);
docs = termsEnum.docs(liveDocs, docs, false);
if (docs == null) {
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
}
@ -1168,6 +1194,7 @@ public class CheckIndex {
// TODO: maybe we can factor out testTermIndex and reuse here?
DocsEnum docs = null;
DocsEnum docsAndFreqs = null;
DocsAndPositionsEnum postings = null;
final Bits liveDocs = reader.getLiveDocs();
for (int j = 0; j < info.docCount; ++j) {
@ -1210,50 +1237,53 @@ public class CheckIndex {
if (totalTermFreq != -1 && totalTermFreq <= 0) {
throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
}
DocsEnum docsEnum;
DocsAndPositionsEnum dp = termsEnum.docsAndPositions(null, postings);
if (dp == null) {
DocsEnum d = termsEnum.docs(null, docs);
docsEnum = docs = d;
postings = termsEnum.docsAndPositions(null, postings);
if (postings == null) {
docsAndFreqs = termsEnum.docs(null, docsAndFreqs, true);
if (docsAndFreqs == null) {
docs = termsEnum.docs(null, docs, false);
} else {
docs = docsAndFreqs;
}
} else {
docsEnum = postings = dp;
docs = docsAndFreqs = postings;
}
final int doc = docsEnum.nextDoc();
final int doc = docs.nextDoc();
if (doc != 0) {
throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
}
final int tf = docsEnum.freq();
tfvComputedSumTotalTermFreq += tf;
if (docsAndFreqs != null) {
final int tf = docsAndFreqs.freq();
if (tf <= 0) {
throw new RuntimeException("vector freq " + tf + " is out of bounds");
}
if (totalTermFreq != -1 && totalTermFreq != tf) {
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
}
tfvComputedSumTotalTermFreq += tf;
if (tf <= 0) {
throw new RuntimeException("vector freq " + tf + " is out of bounds");
}
if (totalTermFreq != -1 && totalTermFreq != tf) {
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
}
if (dp != null) {
int lastPosition = -1;
for (int i = 0; i < tf; i++) {
int pos = dp.nextPosition();
if (pos != -1 && pos < 0) {
throw new RuntimeException("vector position " + pos + " is out of bounds");
}
if (postings != null) {
int lastPosition = -1;
for (int i = 0; i < tf; i++) {
int pos = postings.nextPosition();
if (pos != -1 && pos < 0) {
throw new RuntimeException("vector position " + pos + " is out of bounds");
}
if (pos < lastPosition) {
throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
}
if (pos < lastPosition) {
throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
}
lastPosition = pos;
lastPosition = pos;
}
}
}
if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
throw new RuntimeException("vector for doc " + j + " references multiple documents!");
}
}

View File

@ -314,7 +314,7 @@ public class DocTermOrds {
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
docsEnum = te.docs(liveDocs, docsEnum);
docsEnum = te.docs(liveDocs, docsEnum, false);
// dF, but takes deletions into account
int actualDF = 0;
@ -650,8 +650,8 @@ public class DocTermOrds {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
return termsEnum.docs(liveDocs, reuse);
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return termsEnum.docs(liveDocs, reuse, needsFreqs);
}
@Override

View File

@ -20,7 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.AttributeSource;
/** Iterates through the documents, term freq and positions.
/** Iterates through the documents and term freqs.
* NOTE: you must first call {@link #nextDoc} before using
* any of the per-doc methods. */
public abstract class DocsEnum extends DocIdSetIterator {

View File

@ -44,6 +44,7 @@ public final class FieldInfo {
*/
public static enum IndexOptions {
/** only documents are indexed: term frequencies and positions are omitted */
// TODO: maybe rename to just DOCS?
DOCS_ONLY,
/** only documents and term frequencies are indexed: positions are omitted */
DOCS_AND_FREQS,

View File

@ -175,8 +175,8 @@ public class FilterIndexReader extends IndexReader {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
return in.docs(liveDocs, reuse);
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return in.docs(liveDocs, reuse, needsFreqs);
}
@Override

View File

@ -166,8 +166,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
}
@Override
public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException {
return tenum.docs(bits, reuse);
public DocsEnum docs(Bits bits, DocsEnum reuse, boolean needsFreqs) throws IOException {
return tenum.docs(bits, reuse, needsFreqs);
}
@Override

View File

@ -1033,7 +1033,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
/** Returns {@link DocsEnum} for the specified field &
* term. This may return null, if either the field or
* term does not exist. */
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
assert field != null;
assert term != null;
final Fields fields = fields();
@ -1042,7 +1042,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null);
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}
}
@ -1052,7 +1052,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
/** Returns {@link DocsAndPositionsEnum} for the specified
* field & term. This may return null, if either the
* field or term does not exist, or, positions were not
* stored for this term. */
* indexed for this field. */
public DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
assert field != null;
assert term != null;
@ -1074,7 +1074,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
* {@link TermState}. This may return null, if either the field or the term
* does not exists or the {@link TermState} is invalid for the underlying
* implementation.*/
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, TermState state, boolean needsFreqs) throws IOException {
assert state != null;
assert field != null;
final Fields fields = fields();
@ -1083,7 +1083,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
termsEnum.seekExact(term, state);
return termsEnum.docs(liveDocs, null);
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}
return null;
@ -1093,7 +1093,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
* Returns {@link DocsAndPositionsEnum} for the specified field and
* {@link TermState}. This may return null, if either the field or the term
* does not exists, the {@link TermState} is invalid for the underlying
* implementation, or positions were not stored for this term.*/
* implementation, or positions were not indexed for this field. */
public DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
assert state != null;
assert field != null;
@ -1161,8 +1161,11 @@ public abstract class IndexReader implements Cloneable,Closeable {
DocsEnum docs = MultiFields.getTermDocsEnum(this,
MultiFields.getLiveDocs(this),
term.field(),
term.bytes());
if (docs == null) return 0;
term.bytes(),
false);
if (docs == null) {
return 0;
}
int n = 0;
int doc;
while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {

View File

@ -151,14 +151,14 @@ public final class MultiFields extends Fields {
/** Returns {@link DocsEnum} for the specified field &
* term. This may return null if the term does not
* exist. */
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
assert field != null;
assert term != null;
final Terms terms = getTerms(r, field);
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(term, true)) {
return termsEnum.docs(liveDocs, null);
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}
return null;

View File

@ -347,7 +347,7 @@ public final class MultiTermsEnum extends TermsEnum {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
MultiDocsEnum docsEnum;
// Can only reuse if incoming enum is also a MultiDocsEnum
if (reuse != null && reuse instanceof MultiDocsEnum) {
@ -397,14 +397,16 @@ public final class MultiTermsEnum extends TermsEnum {
}
assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index]);
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index], needsFreqs);
if (subDocsEnum != null) {
docsEnum.subDocsEnum[entry.index] = subDocsEnum;
subDocs[upto].docsEnum = subDocsEnum;
subDocs[upto].slice = entry.subSlice;
upto++;
} else {
// One of our subs cannot provide freqs:
assert needsFreqs;
return null;
}
}
@ -475,7 +477,7 @@ public final class MultiTermsEnum extends TermsEnum {
subDocsAndPositions[upto].slice = entry.subSlice;
upto++;
} else {
if (entry.terms.docs(b, null) != null) {
if (entry.terms.docs(b, null, false) != null) {
// At least one of our subs does not store
// positions -- we can't correctly produce a
// MultiDocsAndPositions enum

View File

@ -147,12 +147,16 @@ public abstract class TermsEnum {
/** Get {@link DocsEnum} for the current term. Do not
* call this when the enum is unpositioned. This method
* will not return null.
* may return null (if needsFreqs is true but freqs were
* not indexed for this field).
*
* @param liveDocs unset bits are documents that should not
* be returned
* @param reuse pass a prior DocsEnum for possible reuse */
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException;
* @param reuse pass a prior DocsEnum for possible reuse
* @param needsFreqs true if the caller intends to call
* {@link DocsEnum#freq}. If you pass false you must not
* call {@link DocsEnum#freq} in the returned DocsEnum. */
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException;
/** Get {@link DocsAndPositionsEnum} for the current term.
* Do not call this when the enum is unpositioned.
@ -229,7 +233,7 @@ public abstract class TermsEnum {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
throw new IllegalStateException("this method should never be called");
}

View File

@ -25,8 +25,8 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -689,14 +689,11 @@ public class BlockTermsReader extends FieldsProducer {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
//System.out.println("BTR.docs this=" + this);
decodeMetaData();
//System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, liveDocs, reuse);
assert docsEnum != null;
//System.out.println("BTR.docs: return docsEnum=" + docsEnum);
return docsEnum;
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, needsFreqs);
}
@Override

View File

@ -875,9 +875,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
currentFrame.decodeMetaData();
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse);
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, needsFreqs);
}
@Override
@ -2082,7 +2082,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
}
@Override
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
assert !eof;
//if (DEBUG) {
//System.out.println("BTTR.docs seg=" + segment);
@ -2091,10 +2091,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
//if (DEBUG) {
//System.out.println(" state=" + currentFrame.state);
//}
final DocsEnum docsEnum = postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse);
assert docsEnum != null;
return docsEnum;
return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, needsFreqs);
}
@Override

View File

@ -63,7 +63,19 @@ public abstract class PostingsConsumer {
int df = 0;
long totTF = 0;
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
while(true) {
final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
visitedDocs.set(doc);
this.startDoc(doc, 0);
this.finishDoc();
df++;
}
totTF = -1;
} else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) {
while(true) {
final int doc = postings.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {

View File

@ -51,7 +51,7 @@ public abstract class PostingsReaderBase implements Closeable {
/** Must fully consume state, since after this call that
* TermState may be reused. */
public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */

View File

@ -51,8 +51,9 @@ public abstract class TermsConsumer {
public abstract Comparator<BytesRef> getComparator() throws IOException;
/** Default merge impl */
private MappingMultiDocsEnum docsEnum = null;
private MappingMultiDocsAndPositionsEnum postingsEnum = null;
private MappingMultiDocsEnum docsEnum;
private MappingMultiDocsEnum docsAndFreqsEnum;
private MappingMultiDocsAndPositionsEnum postingsEnum;
public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException {
@ -63,7 +64,7 @@ public abstract class TermsConsumer {
long sumDFsinceLastAbortCheck = 0;
FixedBitSet visitedDocs = new FixedBitSet(mergeState.mergedDocCount);
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
if (docsEnum == null) {
docsEnum = new MappingMultiDocsEnum();
}
@ -74,14 +75,14 @@ public abstract class TermsConsumer {
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn);
docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn, false);
if (docsEnumIn != null) {
docsEnum.reset(docsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumTotalTermFreq += stats.docFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
@ -91,7 +92,35 @@ public abstract class TermsConsumer {
}
}
}
} else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) {
if (docsAndFreqsEnum == null) {
docsAndFreqsEnum = new MappingMultiDocsEnum();
}
docsAndFreqsEnum.setMergeState(mergeState);
MultiDocsEnum docsAndFreqsEnumIn = null;
while((term = termsEnum.next()) != null) {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
docsAndFreqsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsAndFreqsEnumIn, true);
assert docsAndFreqsEnumIn != null;
docsAndFreqsEnum.reset(docsAndFreqsEnumIn);
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, docsAndFreqsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}
} else {
assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
if (postingsEnum == null) {
postingsEnum = new MappingMultiDocsAndPositionsEnum();
}
@ -101,27 +130,26 @@ public abstract class TermsConsumer {
// We can pass null for liveDocs, because the
// mapping enum will skip the non-live docs:
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
if (postingsEnumIn != null) {
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
for (int i = 0; i < mergeState.readers.size(); i++) {
if (mergeState.dirPayloadProcessor[i] != null) {
mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
}
assert postingsEnumIn != null;
postingsEnum.reset(postingsEnumIn);
// set PayloadProcessor
if (mergeState.payloadProcessorProvider != null) {
for (int i = 0; i < mergeState.readers.size(); i++) {
if (mergeState.dirPayloadProcessor[i] != null) {
mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
}
}
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
final PostingsConsumer postingsConsumer = startTerm(term);
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
if (stats.docFreq > 0) {
finishTerm(term, stats);
sumTotalTermFreq += stats.totalTermFreq;
sumDFsinceLastAbortCheck += stats.docFreq;
sumDocFreq += stats.docFreq;
if (sumDFsinceLastAbortCheck > 60000) {
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
sumDFsinceLastAbortCheck = 0;
}
}
}

View File

@ -27,8 +27,8 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -950,9 +950,11 @@ public class Lucene3xFields extends FieldsProducer {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
PreDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof PreDocsEnum)) {
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
return null;
} else if (reuse == null || !(reuse instanceof PreDocsEnum)) {
docsEnum = new PreDocsEnum();
} else {
docsEnum = (PreDocsEnum) reuse;

View File

@ -112,7 +112,10 @@ public class SegmentTermDocs {
}
public final int doc() { return doc; }
public final int freq() { return freq; }
public final int freq() {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
protected void skippingDoc() throws IOException {
}
@ -125,7 +128,6 @@ public class SegmentTermDocs {
if (indexOptions == IndexOptions.DOCS_ONLY) {
doc += docCode;
freq = 1;
} else {
doc += docCode >>> 1; // shift off low bit
if ((docCode & 1) != 0) // if low bit is set

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs.lucene40;
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.lucene.index.DocsAndPositionsEnum;
@ -209,9 +208,11 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
SegmentDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
return null;
} else if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
docsEnum = new SegmentDocsEnum(freqIn);
} else {
docsEnum = (SegmentDocsEnum) reuse;
@ -277,7 +278,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
final IndexInput freqIn;
final IndexInput startFreqIn;
boolean omitTF; // does current field omit term freq?
boolean indexOmitsTF; // does current field omit term freq?
boolean storePayloads; // does current field store payloads?
int limit; // number of docs in this posting
@ -300,12 +301,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
if (omitTF) {
freq = 1;
Arrays.fill(freqs, 1);
}
indexOmitsTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.storePayloads;
this.liveDocs = liveDocs;
freqOffset = termState.freqOffset;
@ -331,6 +327,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
@Override
public int freq() {
assert !indexOmitsTF;
return freq;
}
@ -389,7 +386,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
count = bufferSize;
ord += bufferSize;
if (omitTF)
if (indexOmitsTF)
fillDocs(bufferSize);
else
fillDocsAndFreqs(bufferSize);
@ -400,7 +397,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
private int scanTo(int target) throws IOException {
while (ord++ < limit) {
int code = freqIn.readVInt();
if (omitTF) {
if (indexOmitsTF) {
accum += code;
} else {
accum += code >>> 1; // shift off low bit

View File

@ -505,7 +505,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs /* ignored */) throws IOException {
TVDocsEnum docsEnum;
if (reuse != null && reuse instanceof TVDocsEnum) {
docsEnum = (TVDocsEnum) reuse;

View File

@ -26,8 +26,8 @@ import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexFileNames;
@ -36,10 +36,10 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.PostingsConsumer;
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.TermStats;
import org.apache.lucene.index.codecs.TermsConsumer;
import org.apache.lucene.store.ByteArrayDataInput;
@ -317,7 +317,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
docUpto++;
if (indexOptions == IndexOptions.DOCS_ONLY) {
accum += in.readVInt();
freq = 1;
} else {
final int code = in.readVInt();
accum += code >>> 1;
@ -371,6 +370,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
@Override
public int freq() {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
}
@ -600,10 +600,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
decodeMetaData();
FSTDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
if (needsFreqs && field.indexOptions == IndexOptions.DOCS_ONLY) {
return null;
} else if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
} else {
docsEnum = (FSTDocsEnum) reuse;

View File

@ -178,7 +178,10 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
@Override
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && field.indexOptions == IndexOptions.DOCS_ONLY) {
return null;
}
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
@ -202,11 +205,11 @@ public class PulsingPostingsReader extends PostingsReaderBase {
return postings.reset(liveDocs, termState);
} else {
if (reuse instanceof PulsingDocsEnum) {
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse));
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), needsFreqs);
setOther(wrapped, reuse); // wrapped.other = reuse
return wrapped;
} else {
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse);
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, needsFreqs);
}
}
}
@ -283,7 +286,6 @@ public class PulsingPostingsReader extends PostingsReaderBase {
docID = -1;
accum = 0;
payloadLength = 0;
freq = 1;
this.liveDocs = liveDocs;
return this;
}
@ -342,6 +344,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
@Override
public int freq() {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}

View File

@ -272,7 +272,10 @@ public class SepPostingsReader extends PostingsReaderBase {
}
@Override
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
return null;
}
final SepTermState termState = (SepTermState) _termState;
SepDocsEnum docsEnum;
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
@ -369,8 +372,6 @@ public class SepPostingsReader extends PostingsReaderBase {
if (!omitTF) {
freqIndex.set(termState.freqIndex);
freqIndex.seek(freqReader);
} else {
freq = 1;
}
docFreq = termState.docFreq;
@ -412,6 +413,7 @@ public class SepPostingsReader extends PostingsReaderBase {
@Override
public int freq() {
assert !omitTF;
return freq;
}

View File

@ -17,19 +17,24 @@ package org.apache.lucene.index.codecs.simpletext;
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.DocsEnum;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.StringHelper;
@ -37,13 +42,8 @@ import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.fst.Builder;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.PositiveIntOutputs;
import org.apache.lucene.util.fst.PairOutputs;
import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.HashMap;
import org.apache.lucene.util.fst.PositiveIntOutputs;
class SimpleTextFieldsReader extends FieldsProducer {
@ -190,14 +190,17 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (needsFreqs && indexOptions == IndexOptions.DOCS_ONLY) {
return null;
}
SimpleTextDocsEnum docsEnum;
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
docsEnum = (SimpleTextDocsEnum) reuse;
} else {
docsEnum = new SimpleTextDocsEnum();
}
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
return docsEnum.reset(docsStart, liveDocs, !needsFreqs);
}
@Override
@ -245,9 +248,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
in.seek(fp);
this.omitTF = omitTF;
docID = -1;
if (omitTF) {
tf = 1;
}
return this;
}
@ -258,6 +258,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
@Override
public int freq() {
assert !omitTF;
return tf;
}

View File

@ -365,10 +365,10 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
// TODO: reuse
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
e.reset(liveDocs, current.getValue().freq);
e.reset(liveDocs, needsFreqs ? current.getValue().freq : -1);
return e;
}
@ -399,6 +399,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
@Override
public int freq() {
assert freq != -1;
return freq;
}

View File

@ -17,21 +17,22 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.search.TermQuery.TermWeight;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
import org.apache.lucene.search.TermQuery.TermWeight;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.search.similarities.SimilarityProvider;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents matching boolean combinations of other
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
* BooleanQuerys.
@ -349,6 +350,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
private Scorer createConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
// TODO: fix scorer API to specify "needsScores" up
// front, so we can do match-only if caller doesn't
// needs scores
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
for (int i = 0; i < docsAndFreqs.length; i++) {
final TermWeight weight = (TermWeight) weights.get(i);
@ -357,12 +363,46 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
return null;
}
final ExactDocScorer docScorer = weight.createDocScorer(context);
docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docs(
acceptDocs, null), termsEnum.docFreq(), docScorer);
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
if (docsAndFreqsEnum == null) {
// TODO: we could carry over TermState from the
// terms we already seek'd to, to save re-seeking
// to make the match-only scorer, but it's likely
// rare that BQ mixes terms from omitTf and
// non-omitTF fields:
// At least one sub cannot provide freqs; abort
// and fallback to full match-only scorer:
return createMatchOnlyConjunctionTermScorer(context, acceptDocs);
}
docsAndFreqs[i] = new DocsAndFreqs(docsAndFreqsEnum,
docsAndFreqsEnum,
termsEnum.docFreq(), docScorer);
}
return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
}
private Scorer createMatchOnlyConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
throws IOException {
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
for (int i = 0; i < docsAndFreqs.length; i++) {
final TermWeight weight = (TermWeight) weights.get(i);
final TermsEnum termsEnum = weight.getTermsEnum(context);
if (termsEnum == null) {
return null;
}
final ExactDocScorer docScorer = weight.createDocScorer(context);
docsAndFreqs[i] = new DocsAndFreqs(null,
termsEnum.docs(acceptDocs, null, false),
termsEnum.docFreq(), docScorer);
}
return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
}
@Override
public boolean scoresDocsOutOfOrder() {

View File

@ -17,17 +17,18 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.util.ArrayUtil;
import java.io.IOException;
import java.util.Comparator;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.util.ArrayUtil;
/** Scorer for conjunctions, sets of terms, all of which are required. */
final class ConjunctionTermScorer extends Scorer {
private final float coord;
private int lastDoc = -1;
private final DocsAndFreqs[] docsAndFreqs;
class ConjunctionTermScorer extends Scorer {
protected final float coord;
protected int lastDoc = -1;
protected final DocsAndFreqs[] docsAndFreqs;
private final DocsAndFreqs lead;
ConjunctionTermScorer(Weight weight, float coord,
@ -39,7 +40,7 @@ final class ConjunctionTermScorer extends Scorer {
// lead the matching.
ArrayUtil.mergeSort(docsAndFreqs, new Comparator<DocsAndFreqs>() {
public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
return o1.freq - o2.freq;
return o1.docFreq - o2.docFreq;
}
});
@ -96,14 +97,16 @@ final class ConjunctionTermScorer extends Scorer {
}
static final class DocsAndFreqs {
final DocsEnum docsAndFreqs;
final DocsEnum docs;
final int freq;
final int docFreq;
final ExactDocScorer docScorer;
int doc = -1;
DocsAndFreqs(DocsEnum docs, int freq, ExactDocScorer docScorer) {
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) {
this.docsAndFreqs = docsAndFreqs;
this.docs = docs;
this.freq = freq;
this.docFreq = docFreq;
this.docScorer = docScorer;
}
}

View File

@ -342,7 +342,7 @@ class FieldCacheImpl implements FieldCache {
break;
}
final byte termval = parser.parseByte(term);
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -415,7 +415,7 @@ class FieldCacheImpl implements FieldCache {
break;
}
final short termval = parser.parseShort(term);
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -519,7 +519,7 @@ class FieldCacheImpl implements FieldCache {
retArray = new int[maxDoc];
}
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -586,7 +586,7 @@ class FieldCacheImpl implements FieldCache {
res = new FixedBitSet(maxDoc);
}
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
// TODO: use bulk API
while (true) {
final int docID = docs.nextDoc();
@ -669,7 +669,7 @@ class FieldCacheImpl implements FieldCache {
retArray = new float[maxDoc];
}
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -757,7 +757,7 @@ class FieldCacheImpl implements FieldCache {
retArray = new long[maxDoc];
}
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -846,7 +846,7 @@ class FieldCacheImpl implements FieldCache {
retArray = new double[maxDoc];
}
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -1020,7 +1020,7 @@ class FieldCacheImpl implements FieldCache {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
throw new UnsupportedOperationException();
}
@ -1147,7 +1147,7 @@ class FieldCacheImpl implements FieldCache {
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
}
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {
@ -1268,7 +1268,7 @@ class FieldCacheImpl implements FieldCache {
break;
}
final long pointer = bytes.copyUsingLengthPrefix(term);
docs = termsEnum.docs(null, docs);
docs = termsEnum.docs(null, docs, false);
while (true) {
final int docID = docs.nextDoc();
if (docID == DocsEnum.NO_MORE_DOCS) {

View File

@ -259,8 +259,8 @@ public final class FuzzyTermsEnum extends TermsEnum {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
return actualEnum.docs(liveDocs, reuse);
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
return actualEnum.docs(liveDocs, reuse, needsFreqs);
}
@Override

View File

@ -0,0 +1,37 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
/** Scorer for conjunctions, sets of terms, all of which are required. */
final class MatchOnlyConjunctionTermScorer extends ConjunctionTermScorer {
MatchOnlyConjunctionTermScorer(Weight weight, float coord,
DocsAndFreqs[] docsAndFreqs) throws IOException {
super(weight, coord, docsAndFreqs);
}
@Override
public float score() throws IOException {
float sum = 0.0f;
for (DocsAndFreqs docs : docsAndFreqs) {
sum += docs.docScorer.score(lastDoc, 1);
}
return sum * coord;
}
}

View File

@ -0,0 +1,94 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.similarities.Similarity;
/** Expert: A <code>Scorer</code> for documents matching a
* <code>Term</code>. It treats all documents as having
* one occurrenc (tf=1) for the term.
*/
final class MatchOnlyTermScorer extends Scorer {
private final DocsEnum docsEnum;
private final Similarity.ExactDocScorer docScorer;
/**
* Construct a <code>TermScorer</code>.
*
* @param weight
* The weight of the <code>Term</code> in the query.
* @param td
* An iterator over the documents matching the <code>Term</code>.
* @param docScorer
* The </code>Similarity.ExactDocScorer</code> implementation
* to be used for score computations.
*/
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
super(weight);
this.docScorer = docScorer;
this.docsEnum = td;
}
@Override
public int docID() {
return docsEnum.docID();
}
@Override
public float freq() {
return 1.0f;
}
/**
* Advances to the next document matching the query. <br>
*
* @return the document matching the query or NO_MORE_DOCS if there are no more documents.
*/
@Override
public int nextDoc() throws IOException {
return docsEnum.nextDoc();
}
@Override
public float score() {
assert docID() != NO_MORE_DOCS;
return docScorer.score(docsEnum.docID(), 1);
}
/**
* Advances to the first match beyond the current whose document number is
* greater than or equal to a given target. <br>
* The implementation uses {@link DocsEnum#advance(int)}.
*
* @param target
* The target document number.
* @return the matching document or NO_MORE_DOCS if none exist.
*/
@Override
public int advance(int target) throws IOException {
return docsEnum.advance(target);
}
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")"; }
}

View File

@ -229,7 +229,7 @@ public class MultiPhraseQuery extends Query {
if (postingsEnum == null) {
// term does exist, but has no positions
assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
assert termsEnum.docs(liveDocs, null, false) != null: "termstate found but no term exists in reader";
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}

View File

@ -105,7 +105,7 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
do {
// System.out.println(" iter termCount=" + termCount + " term=" +
// enumerator.term().toBytesString());
docsEnum = termsEnum.docs(acceptDocs, docsEnum);
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
int docid;
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
bitSet.set(docid);

View File

@ -244,7 +244,7 @@ public class PhraseQuery extends Query {
// PhraseQuery on a field that did not index
// positions.
if (postingsEnum == null) {
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state, false) != null: "termstate found but no term exists in reader";
// term does exist, but has no positions
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
}

View File

@ -21,18 +21,18 @@ import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.TermContext;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing a term.
@ -83,10 +83,15 @@ public class TermQuery extends Query {
if (termsEnum == null) {
return null;
}
// TODO should we reuse the DocsEnum here?
final DocsEnum docs = termsEnum.docs(acceptDocs, null);
assert docs != null;
return new TermScorer(this, docs, createDocScorer(context));
DocsEnum docs = termsEnum.docs(acceptDocs, null, true);
if (docs != null) {
return new TermScorer(this, docs, createDocScorer(context));
} else {
// Index does not store freq info
docs = termsEnum.docs(acceptDocs, null, false);
assert docs != null;
return new MatchOnlyTermScorer(this, docs, createDocScorer(context));
}
}
/**
@ -120,12 +125,11 @@ public class TermQuery extends Query {
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
IndexReader reader = context.reader;
DocsEnum docs = reader.termDocsEnum(context.reader.getLiveDocs(), term.field(), term.bytes());
if (docs != null) {
int newDoc = docs.advance(doc);
Scorer scorer = scorer(context, true, false, context.reader.getLiveDocs());
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
int freq = docs.freq();
float freq = scorer.freq();
ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
@ -136,8 +140,7 @@ public class TermQuery extends Query {
return result;
}
}
return new ComplexExplanation(false, 0.0f, "no matching term");
return new ComplexExplanation(false, 0.0f, "no matching term");
}
}

View File

@ -383,7 +383,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
return new RAMDocsEnum(ramField.termToDocs.get(current), liveDocs);
}

View File

@ -37,12 +37,18 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CheckIndex;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PostingsFormat;
@ -520,4 +526,51 @@ public class _TestUtil {
return doc2;
}
// Returns a DocsEnum, but randomly sometimes uses a
// DocsAndFreqsEnum, DocsAndPositionsEnum. Returns null
// if field/term doesn't exist:
public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
final Terms terms = MultiFields.getTerms(r, field);
if (terms == null) {
return null;
}
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(term, random.nextBoolean())) {
return null;
}
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
if (docsAndPositions != null) {
return docsAndPositions;
}
}
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, reuse, true);
if (docsAndFreqs != null) {
return docsAndFreqs;
}
}
return termsEnum.docs(liveDocs, reuse, needsFreqs);
}
// Returns a DocsEnum from a positioned TermsEnum, but
// randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum.
public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
if (random.nextBoolean()) {
if (random.nextBoolean()) {
// TODO: cast re-use to D&PE if we can...?
final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
if (docsAndPositions != null) {
return docsAndPositions;
}
}
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, null, true);
if (docsAndFreqs != null) {
return docsAndFreqs;
}
}
return termsEnum.docs(liveDocs, null, needsFreqs);
}
}

View File

@ -530,7 +530,7 @@ public class TestAddIndexes extends LuceneTestCase {
private void verifyTermDocs(Directory dir, Term term, int numDocs)
throws IOException {
IndexReader reader = IndexReader.open(dir, true);
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes);
DocsEnum docsEnum = _TestUtil.docs(random, reader, term.field, term.bytes, null, null, false);
int count = 0;
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
count++;

View File

@ -669,7 +669,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// should be found exactly
assertEquals(TermsEnum.SeekStatus.FOUND,
terms.seekCeil(aaaTerm));
assertEquals(35, countDocs(terms.docs(null, null)));
assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false)));
assertNull(terms.next());
// should hit end of field
@ -681,12 +681,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertEquals(TermsEnum.SeekStatus.NOT_FOUND,
terms.seekCeil(new BytesRef("a")));
assertTrue(terms.term().bytesEquals(aaaTerm));
assertEquals(35, countDocs(terms.docs(null, null)));
assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false)));
assertNull(terms.next());
assertEquals(TermsEnum.SeekStatus.FOUND,
terms.seekCeil(aaaTerm));
assertEquals(35, countDocs(terms.docs(null, null)));
assertEquals(35, countDocs(_TestUtil.docs(random, terms,null, null, false)));
assertNull(terms.next());
r.close();

View File

@ -279,7 +279,7 @@ public class TestCodecs extends LuceneTestCase {
// make sure it properly fully resets (rewinds) its
// internal state:
for(int iter=0;iter<2;iter++) {
docsEnum = termsEnum.docs(null, docsEnum);
docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false);
assertEquals(terms[i].docs[0], docsEnum.nextDoc());
assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
}
@ -479,7 +479,7 @@ public class TestCodecs extends LuceneTestCase {
assertEquals(status, TermsEnum.SeekStatus.FOUND);
assertEquals(term.docs.length, termsEnum.docFreq());
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
} else {
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
}
@ -499,7 +499,7 @@ public class TestCodecs extends LuceneTestCase {
assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
assertEquals(term.docs.length, termsEnum.docFreq());
if (field.omitTF) {
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
} else {
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
}
@ -549,15 +549,22 @@ public class TestCodecs extends LuceneTestCase {
do {
term = field.terms[upto];
if (TestCodecs.random.nextInt(3) == 1) {
final DocsEnum docs = termsEnum.docs(null, null);
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(null, null);
final DocsEnum docsEnum;
if (postings != null) {
docsEnum = postings;
final DocsEnum docs;
final DocsEnum docsAndFreqs;
final DocsAndPositionsEnum postings;
if (!field.omitTF) {
postings = termsEnum.docsAndPositions(null, null);
if (postings != null) {
docs = docsAndFreqs = postings;
} else {
docs = docsAndFreqs = _TestUtil.docs(random, termsEnum, null, null, true);
}
} else {
docsEnum = docs;
postings = null;
docsAndFreqs = null;
docs = _TestUtil.docs(random, termsEnum, null, null, false);
}
assertNotNull(docs);
int upto2 = -1;
while(upto2 < term.docs.length-1) {
// Maybe skip:
@ -567,10 +574,10 @@ public class TestCodecs extends LuceneTestCase {
final int inc = 1+TestCodecs.random.nextInt(left-1);
upto2 += inc;
if (TestCodecs.random.nextInt(2) == 1) {
doc = docsEnum.advance(term.docs[upto2]);
doc = docs.advance(term.docs[upto2]);
assertEquals(term.docs[upto2], doc);
} else {
doc = docsEnum.advance(1+term.docs[upto2]);
doc = docs.advance(1+term.docs[upto2]);
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
// skipped past last doc
assert upto2 == term.docs.length-1;
@ -584,20 +591,20 @@ public class TestCodecs extends LuceneTestCase {
}
}
} else {
doc = docsEnum.nextDoc();
doc = docs.nextDoc();
assertTrue(doc != -1);
upto2++;
}
assertEquals(term.docs[upto2], doc);
if (!field.omitTF) {
assertEquals(term.positions[upto2].length, docsEnum.freq());
assertEquals(term.positions[upto2].length, postings.freq());
if (TestCodecs.random.nextInt(2) == 1) {
this.verifyPositions(term.positions[upto2], postings);
}
}
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc());
}
upto++;

View File

@ -17,7 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
@ -25,9 +26,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestDirectoryReader extends LuceneTestCase {
protected Directory dir;
@ -171,15 +171,18 @@ public class TestDirectoryReader extends LuceneTestCase {
// test mixing up TermDocs and TermEnums from different readers.
TermsEnum te2 = MultiFields.getTerms(mr2, "body").iterator(null);
te2.seekCeil(new BytesRef("wow"));
DocsEnum td = MultiFields.getTermDocsEnum(mr2,
MultiFields.getLiveDocs(mr2),
"body",
te2.term());
DocsEnum td = _TestUtil.docs(random, mr2,
"body",
te2.term(),
MultiFields.getLiveDocs(mr2),
null,
false);
TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator(null);
te3.seekCeil(new BytesRef("wow"));
td = te3.docs(MultiFields.getLiveDocs(mr3),
td);
td = _TestUtil.docs(random, te3, MultiFields.getLiveDocs(mr3),
td,
false);
int ret = 0;

View File

@ -68,11 +68,14 @@ public class TestDocCount extends LuceneTestCase {
String field;
while ((field = e.next()) != null) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
int docCount = terms.getDocCount();
FixedBitSet visited = new FixedBitSet(ir.maxDoc());
TermsEnum te = terms.iterator(null);
while (te.next() != null) {
DocsEnum de = te.docs(null, null);
DocsEnum de = _TestUtil.docs(random, te, null, null, false);
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
visited.set(de.docID());
}

View File

@ -22,7 +22,6 @@ import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
@ -34,6 +33,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util._TestUtil;
public class TestDocsAndPositions extends LuceneTestCase {
private String fieldName;
@ -99,16 +99,6 @@ public class TestDocsAndPositions extends LuceneTestCase {
return reader.termPositionsEnum(null, fieldName, bytes);
}
public DocsEnum getDocsEnum(IndexReader reader, BytesRef bytes,
boolean freqs, Bits liveDocs) throws IOException {
int randInt = random.nextInt(10);
if (randInt == 0) { // once in a while throw in a positions enum
return getDocsAndPositions(reader, bytes, liveDocs);
} else {
return reader.termDocsEnum(liveDocs, fieldName, bytes);
}
}
/**
* this test indexes random numbers within a range into a field and checks
* their occurrences by searching for a number from that range selected at
@ -232,31 +222,31 @@ public class TestDocsAndPositions extends LuceneTestCase {
AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext);
for (AtomicReaderContext context : leaves) {
int maxDoc = context.reader.maxDoc();
DocsEnum docsAndPosEnum = getDocsEnum(context.reader, bytes, true, null);
DocsEnum docsEnum = _TestUtil.docs(random, context.reader, fieldName, bytes, null, null, true);
if (findNext(freqInDoc, context.docBase, context.docBase + maxDoc) == Integer.MAX_VALUE) {
assertNull(docsAndPosEnum);
assertNull(docsEnum);
continue;
}
assertNotNull(docsAndPosEnum);
docsAndPosEnum.nextDoc();
assertNotNull(docsEnum);
docsEnum.nextDoc();
for (int j = 0; j < maxDoc; j++) {
if (freqInDoc[context.docBase + j] != 0) {
assertEquals(j, docsAndPosEnum.docID());
assertEquals(docsAndPosEnum.freq(), freqInDoc[context.docBase +j]);
assertEquals(j, docsEnum.docID());
assertEquals(docsEnum.freq(), freqInDoc[context.docBase +j]);
if (i % 2 == 0 && random.nextInt(10) == 0) {
int next = findNext(freqInDoc, context.docBase+j+1, context.docBase + maxDoc) - context.docBase;
int advancedTo = docsAndPosEnum.advance(next);
int advancedTo = docsEnum.advance(next);
if (next >= maxDoc) {
assertEquals(DocsEnum.NO_MORE_DOCS, advancedTo);
} else {
assertTrue("advanced to: " +advancedTo + " but should be <= " + next, next >= advancedTo);
}
} else {
docsAndPosEnum.nextDoc();
docsEnum.nextDoc();
}
}
}
assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsAndPosEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsAndPosEnum.docID());
assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsEnum.docID());
}
}
@ -343,7 +333,7 @@ public class TestDocsAndPositions extends LuceneTestCase {
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexReader r = getOnlySegmentReader(reader);
DocsEnum disi = r.termDocsEnum(null, "foo", new BytesRef("bar"));
DocsEnum disi = _TestUtil.docs(random, r, "foo", new BytesRef("bar"), null, null, false);
int docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
@ -351,7 +341,7 @@ public class TestDocsAndPositions extends LuceneTestCase {
// now reuse and check again
TermsEnum te = r.terms("foo").iterator(null);
assertTrue(te.seekExact(new BytesRef("bar"), true));
disi = te.docs(null, disi);
disi = _TestUtil.docs(random, te, null, disi, false);
docid = disi.docID();
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);

View File

@ -270,18 +270,42 @@ public class TestDuelingCodecs extends LuceneTestCase {
assertPositionsSkipping(leftTermsEnum.docFreq(),
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
// with freqs:
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, true),
rightDocs = rightTermsEnum.docs(null, rightDocs, true),
true);
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true),
true);
// w/o freqs:
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, false),
rightDocs = rightTermsEnum.docs(null, rightDocs, false),
false);
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false),
false);
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs),
rightDocs = rightTermsEnum.docs(null, rightDocs));
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
// with freqs:
assertDocsSkipping(leftTermsEnum.docFreq(),
leftDocs = leftTermsEnum.docs(null, leftDocs),
rightDocs = rightTermsEnum.docs(null, rightDocs));
leftDocs = leftTermsEnum.docs(null, leftDocs, true),
rightDocs = rightTermsEnum.docs(null, rightDocs, true),
true);
assertDocsSkipping(leftTermsEnum.docFreq(),
leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true),
true);
// w/o freqs:
assertDocsSkipping(leftTermsEnum.docFreq(),
leftDocs = leftTermsEnum.docs(null, leftDocs, false),
rightDocs = rightTermsEnum.docs(null, rightDocs, false),
false);
assertDocsSkipping(leftTermsEnum.docFreq(),
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false),
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false),
false);
}
}
assertNull(info, rightTermsEnum.next());
@ -327,13 +351,19 @@ public class TestDuelingCodecs extends LuceneTestCase {
/**
* checks docs + freqs, sequentially
*/
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
if (leftDocs == null) {
assertNull(rightDocs);
return;
}
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
int docid;
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(info, docid, rightDocs.nextDoc());
assertEquals(info, leftDocs.freq(), rightDocs.freq());
if (hasFreqs) {
assertEquals(info, leftDocs.freq(), rightDocs.freq());
}
}
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
}
@ -341,7 +371,11 @@ public class TestDuelingCodecs extends LuceneTestCase {
/**
* checks advancing docs
*/
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
if (leftDocs == null) {
assertNull(rightDocs);
return;
}
int docid = -1;
int averageGap = leftReader.maxDoc() / (1+docFreq);
int skipInterval = 16;
@ -361,7 +395,9 @@ public class TestDuelingCodecs extends LuceneTestCase {
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
return;
}
assertEquals(info, leftDocs.freq(), rightDocs.freq());
if (hasFreqs) {
assertEquals(info, leftDocs.freq(), rightDocs.freq());
}
}
}

View File

@ -320,18 +320,20 @@ public class TestIndexReader extends LuceneTestCase
Term term,
int expected)
throws IOException {
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
term.field(),
new BytesRef(term.text()));
int count = 0;
if (tdocs != null) {
while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) {
count++;
}
}
assertEquals(msg + ", count mismatch", expected, count);
DocsEnum tdocs = _TestUtil.docs(random, reader,
term.field(),
new BytesRef(term.text()),
MultiFields.getLiveDocs(reader),
null,
false);
int count = 0;
if (tdocs != null) {
while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) {
count++;
}
}
assertEquals(msg + ", count mismatch", expected, count);
}
public void testBinaryFields() throws IOException {

View File

@ -546,10 +546,12 @@ public class TestIndexWriter extends LuceneTestCase {
assertEquals(1, reader.numDocs());
Term t = new Term("field", "a");
assertEquals(1, reader.docFreq(t));
DocsEnum td = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
"field",
new BytesRef("a"));
DocsEnum td = _TestUtil.docs(random, reader,
"field",
new BytesRef("a"),
MultiFields.getLiveDocs(reader),
null,
true);
td.nextDoc();
assertEquals(128*1024, td.freq());
reader.close();
@ -1338,12 +1340,12 @@ public class TestIndexWriter extends LuceneTestCase {
// test that the terms were indexed.
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc1field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc2field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc3field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc1field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc2field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc3field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
ir.close();
dir.close();
@ -1415,7 +1417,7 @@ public class TestIndexWriter extends LuceneTestCase {
TermsEnum t = r.fields().terms("field").iterator(null);
int count = 0;
while(t.next() != null) {
final DocsEnum docs = t.docs(null, null);
final DocsEnum docs = _TestUtil.docs(random, t, null, null, false);
assertEquals(0, docs.nextDoc());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc());
count++;

View File

@ -500,10 +500,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
// Make sure the doc that hit the exception was marked
// as deleted:
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
t.field(),
new BytesRef(t.text()));
DocsEnum tdocs = _TestUtil.docs(random, reader,
t.field(),
new BytesRef(t.text()),
MultiFields.getLiveDocs(reader),
null,
false);
int count = 0;
while(tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {

View File

@ -49,9 +49,11 @@ public class TestIndexWriterReader extends LuceneTestCase {
public static int count(Term t, IndexReader r) throws IOException {
int count = 0;
DocsEnum td = MultiFields.getTermDocsEnum(r,
MultiFields.getLiveDocs(r),
t.field(), new BytesRef(t.text()));
DocsEnum td = _TestUtil.docs(random, r,
t.field(), new BytesRef(t.text()),
MultiFields.getLiveDocs(r),
null,
false);
if (td != null) {
while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) {
@ -990,7 +992,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
w.addDocument(doc);
IndexReader r = IndexReader.open(w, true).getSequentialSubReaders()[0];
try {
r.termDocsEnum(null, "f", new BytesRef("val"));
_TestUtil.docs(random, r, "f", new BytesRef("val"), null, null, false);
fail("should have failed to seek since terms index was not loaded.");
} catch (IllegalStateException e) {
// expected - we didn't load the term index

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util._TestUtil;
/**
* MultiThreaded IndexWriter tests
@ -209,10 +210,12 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
// Quick test to make sure index is not corrupt:
IndexReader reader = IndexReader.open(dir, true);
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
"field",
new BytesRef("aaa"));
DocsEnum tdocs = _TestUtil.docs(random, reader,
"field",
new BytesRef("aaa"),
MultiFields.getLiveDocs(reader),
null,
false);
int count = 0;
while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
count++;

View File

@ -367,7 +367,17 @@ public class TestLongPostings extends LuceneTestCase {
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
}
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
final DocsEnum docs;
final DocsEnum postings;
if (options == IndexOptions.DOCS_ONLY) {
docs = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, false);
postings = null;
} else {
docs = postings = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, true);
assert postings != null;
}
assert docs != null;
int docID = -1;
while(docID < DocsEnum.NO_MORE_DOCS) {
@ -388,7 +398,7 @@ public class TestLongPostings extends LuceneTestCase {
expected++;
}
}
docID = postings.nextDoc();
docID = docs.nextDoc();
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
@ -397,7 +407,7 @@ public class TestLongPostings extends LuceneTestCase {
break;
}
if (random.nextInt(6) == 3) {
if (random.nextInt(6) == 3 && postings != null) {
final int freq = postings.freq();
assertTrue(freq >=1 && freq <= 4);
}
@ -424,7 +434,7 @@ public class TestLongPostings extends LuceneTestCase {
}
}
docID = postings.advance(targetDocID);
docID = docs.advance(targetDocID);
if (VERBOSE) {
System.out.println(" got docID=" + docID);
}
@ -433,7 +443,7 @@ public class TestLongPostings extends LuceneTestCase {
break;
}
if (random.nextInt(6) == 3) {
if (random.nextInt(6) == 3 && postings != null) {
final int freq = postings.freq();
assertTrue("got invalid freq=" + freq, freq >=1 && freq <= 4);
}

View File

@ -120,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
}
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
DocsEnum docsEnum = _TestUtil.docs(random, reader, "field", term, liveDocs, null, false);
assertNotNull(docsEnum);
for(int docID : docs.get(term)) {
@ -138,11 +138,12 @@ public class TestMultiFields extends LuceneTestCase {
/*
private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
DocsEnum docs = MultiFields.getTermDocsEnum(r,
MultiFields.getLiveDocs(r),
"field",
new BytesRef(term));
DocsEnum docs = _TestUtil.docs(random, r,
"field",
new BytesRef(term),
MultiFields.getLiveDocs(r),
null,
false);
for(int docID : expected) {
assertEquals(docID, docs.nextDoc());
}
@ -160,8 +161,8 @@ public class TestMultiFields extends LuceneTestCase {
w.addDocument(d);
IndexReader r = w.getReader();
w.close();
DocsEnum d1 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
DocsEnum d2 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
DocsEnum d1 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false);
DocsEnum d2 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false);
assertEquals(0, d1.nextDoc());
assertEquals(0, d2.nextDoc());
r.close();

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
*
@ -52,7 +53,7 @@ public class TestOmitPositions extends LuceneTestCase {
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
DocsEnum de = _TestUtil.docs(random, reader, "foo", new BytesRef("test"), null, null, true);
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
assertEquals(2, de.freq());
}

View File

@ -19,12 +19,13 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestParallelTermEnum extends LuceneTestCase {
private IndexReader ir1;
@ -88,31 +89,31 @@ public class TestParallelTermEnum extends LuceneTestCase {
TermsEnum te = terms.iterator(null);
assertEquals("brown", te.next().utf8ToString());
DocsEnum td = te.docs(liveDocs, null);
DocsEnum td = _TestUtil.docs(random, te, liveDocs, null, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("quick", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
@ -125,31 +126,31 @@ public class TestParallelTermEnum extends LuceneTestCase {
te = terms.iterator(null);
assertEquals("brown", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("quick", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
@ -162,37 +163,37 @@ public class TestParallelTermEnum extends LuceneTestCase {
te = terms.iterator(null);
assertEquals("dog", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("fox", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("jumps", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("lazy", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("over", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
assertEquals("the", te.next().utf8ToString());
td = te.docs(liveDocs, td);
td = _TestUtil.docs(random, te, liveDocs, td, false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(0, td.docID());
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);

View File

@ -20,8 +20,8 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.store.Directory;
@ -31,6 +31,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestPerSegmentDeletes extends LuceneTestCase {
public void testDeletes1() throws Exception {
@ -224,7 +225,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
Terms cterms = fields.terms(term.field);
TermsEnum ctermsEnum = cterms.iterator(null);
if (ctermsEnum.seekExact(new BytesRef(term.text()), false)) {
DocsEnum docsEnum = ctermsEnum.docs(bits, null);
DocsEnum docsEnum = _TestUtil.docs(random, ctermsEnum, bits, null, false);
return toArray(docsEnum);
}
return null;

View File

@ -17,18 +17,19 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestSegmentMerger extends LuceneTestCase {
//The variables for the new merged segment
@ -98,10 +99,12 @@ public class TestSegmentMerger extends LuceneTestCase {
assertTrue(newDoc2 != null);
assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader,
MultiFields.getLiveDocs(mergedReader),
DocHelper.TEXT_FIELD_2_KEY,
new BytesRef("field"));
DocsEnum termDocs = _TestUtil.docs(random, mergedReader,
DocHelper.TEXT_FIELD_2_KEY,
new BytesRef("field"),
MultiFields.getLiveDocs(mergedReader),
null,
false);
assertTrue(termDocs != null);
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);

View File

@ -22,12 +22,12 @@ import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestSegmentReader extends LuceneTestCase {
private Directory dir;
@ -132,16 +132,20 @@ public class TestSegmentReader extends LuceneTestCase {
}
}
DocsEnum termDocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
DocHelper.TEXT_FIELD_1_KEY,
new BytesRef("field"));
DocsEnum termDocs = _TestUtil.docs(random, reader,
DocHelper.TEXT_FIELD_1_KEY,
new BytesRef("field"),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
termDocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
DocHelper.NO_NORMS_KEY,
new BytesRef(DocHelper.NO_NORMS_TEXT));
termDocs = _TestUtil.docs(random, reader,
DocHelper.NO_NORMS_KEY,
new BytesRef(DocHelper.NO_NORMS_TEXT),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);

View File

@ -17,14 +17,15 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestSegmentTermDocs extends LuceneTestCase {
private Document testDoc = new Document();
@ -61,7 +62,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null);
terms.seekCeil(new BytesRef("field"));
DocsEnum termDocs = terms.docs(reader.getLiveDocs(), null);
DocsEnum termDocs = _TestUtil.docs(random, terms, reader.getLiveDocs(), null, true);
if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
int docId = termDocs.docID();
assertTrue(docId == 0);
@ -80,9 +81,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random));
assertTrue(reader != null);
DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(),
"textField2",
new BytesRef("bad"));
DocsEnum termDocs = _TestUtil.docs(random, reader,
"textField2",
new BytesRef("bad"),
reader.getLiveDocs(),
null,
false);
assertNull(termDocs);
reader.close();
@ -91,9 +95,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
//After adding the document, we should be able to read it back in
SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random));
assertTrue(reader != null);
DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(),
"junk",
new BytesRef("bad"));
DocsEnum termDocs = _TestUtil.docs(random, reader,
"junk",
new BytesRef("bad"),
reader.getLiveDocs(),
null,
false);
assertNull(termDocs);
reader.close();
}
@ -125,10 +132,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
ta.field(),
new BytesRef(ta.text()));
DocsEnum tdocs = _TestUtil.docs(random, reader,
ta.field(),
new BytesRef(ta.text()),
MultiFields.getLiveDocs(reader),
null,
true);
// without optimization (assumption skipInterval == 16)
@ -148,10 +157,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);
// without next
tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
ta.field(),
new BytesRef(ta.text()));
tdocs = _TestUtil.docs(random, reader,
ta.field(),
new BytesRef(ta.text()),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
assertEquals(0, tdocs.docID());
@ -164,10 +175,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
// exactly skipInterval documents and therefore with optimization
// with next
tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
tb.field(),
new BytesRef(tb.text()));
tdocs = _TestUtil.docs(random, reader,
tb.field(),
new BytesRef(tb.text()),
MultiFields.getLiveDocs(reader),
null,
true);
assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(10, tdocs.docID());
@ -186,10 +199,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);
// without next
tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
tb.field(),
new BytesRef(tb.text()));
tdocs = _TestUtil.docs(random, reader,
tb.field(),
new BytesRef(tb.text()),
MultiFields.getLiveDocs(reader),
null,
true);
assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
assertEquals(10, tdocs.docID());
@ -204,10 +219,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
// much more than skipInterval documents and therefore with optimization
// with next
tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
tc.field(),
new BytesRef(tc.text()));
tdocs = _TestUtil.docs(random, reader,
tc.field(),
new BytesRef(tc.text()),
MultiFields.getLiveDocs(reader),
null,
true);
assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
assertEquals(26, tdocs.docID());
@ -228,10 +245,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);
//without next
tdocs = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
tc.field(),
new BytesRef(tc.text()));
tdocs = _TestUtil.docs(random, reader,
tc.field(),
new BytesRef(tc.text()),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
assertEquals(26, tdocs.docID());
assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);

View File

@ -75,11 +75,11 @@ public class TestStressAdvance extends LuceneTestCase {
System.out.println("\nTEST: iter=" + iter + " iter2=" + iter2);
}
assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("a")));
de = te.docs(null, de);
de = _TestUtil.docs(random, te, null, de, false);
testOne(de, aDocIDs);
assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("b")));
de = te.docs(null, de);
de = _TestUtil.docs(random, te, null, de, false);
testOne(de, bDocIDs);
}

View File

@ -334,7 +334,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
// deleted docs):
DocsEnum docs = null;
while(termsEnum.next() != null) {
docs = termsEnum.docs(liveDocs1, docs);
docs = _TestUtil.docs(random, termsEnum, null, docs, false);
while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
fail("r1 is not empty but r2 is");
}
@ -354,9 +354,9 @@ public class TestStressIndexing2 extends LuceneTestCase {
break;
}
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
termDocs1 = _TestUtil.docs(random, termsEnum, liveDocs1, termDocs1, false);
if (termsEnum2.seekExact(term, false)) {
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
termDocs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, termDocs2, false);
} else {
termDocs2 = null;
}
@ -415,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = termsEnum3.docs(null, dEnum);
dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq();
@ -449,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
System.out.println(" pos=" + dpEnum.nextPosition());
}
} else {
dEnum = termsEnum3.docs(null, dEnum);
dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true);
assertNotNull(dEnum);
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
final int freq = dEnum.freq();
@ -506,7 +506,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
}
//System.out.println("TEST: term1=" + term1);
docs1 = termsEnum1.docs(liveDocs1, docs1);
docs1 = _TestUtil.docs(random, termsEnum1, liveDocs1, docs1, true);
while (docs1.nextDoc() != DocsEnum.NO_MORE_DOCS) {
int d = docs1.docID();
int f = docs1.freq();
@ -540,7 +540,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
}
//System.out.println("TEST: term1=" + term1);
docs2 = termsEnum2.docs(liveDocs2, docs2);
docs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, docs2, true);
while (docs2.nextDoc() != DocsEnum.NO_MORE_DOCS) {
int d = r2r1[docs2.docID()];
int f = docs2.freq();
@ -667,8 +667,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum1.nextDoc());
assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum2.nextDoc());
} else {
dEnum1 = termsEnum1.docs(null, dEnum1);
dEnum2 = termsEnum2.docs(null, dEnum2);
dEnum1 = _TestUtil.docs(random, termsEnum1, null, dEnum1, true);
dEnum2 = _TestUtil.docs(random, termsEnum2, null, dEnum2, true);
assertNotNull(dEnum1);
assertNotNull(dEnum2);
int docID1 = dEnum1.nextDoc();

View File

@ -24,9 +24,9 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -37,6 +37,7 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestTermVectorsReader extends LuceneTestCase {
//Must be lexicographically sorted, will do in setup, versus trying to maintain here
@ -231,7 +232,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
//System.out.println("Term: " + term);
assertEquals(testTerms[i], term);
docsEnum = termsEnum.docs(null, docsEnum);
docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false);
assertNotNull(docsEnum);
int doc = docsEnum.docID();
assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS);

View File

@ -28,8 +28,9 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
class RepeatingTokenStream extends Tokenizer {
@ -121,7 +122,7 @@ public class TestTermdocPerf extends LuceneTestCase {
DocsEnum tdocs = null;
for (int i=0; i<iter; i++) {
tenum.seekCeil(new BytesRef("val"));
tdocs = tenum.docs(MultiFields.getLiveDocs(reader), tdocs);
tdocs = _TestUtil.docs(random, tenum, MultiFields.getLiveDocs(reader), tdocs, false);
while (tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
ret += tdocs.docID();
}

View File

@ -331,7 +331,7 @@ public class TestTermsEnum extends LuceneTestCase {
}
assertEquals(expected, actual);
assertEquals(1, te.docFreq());
docsEnum = te.docs(null, docsEnum);
docsEnum = _TestUtil.docs(random, te, null, docsEnum, false);
final int docID = docsEnum.nextDoc();
assertTrue(docID != DocsEnum.NO_MORE_DOCS);
assertEquals(docIDToID[docID], termToID.get(expected).intValue());

View File

@ -85,7 +85,7 @@ public class Test10KPulsings extends LuceneTestCase {
for (int i = 0; i < 10050; i++) {
String expected = df.format(i);
assertEquals(expected, te.next().utf8ToString());
de = te.docs(null, de);
de = _TestUtil.docs(random, te, null, de, false);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
}
@ -143,7 +143,7 @@ public class Test10KPulsings extends LuceneTestCase {
for (int i = 0; i < 10050; i++) {
String expected = df.format(i);
assertEquals(expected, te.next().utf8ToString());
de = te.docs(null, de);
de = _TestUtil.docs(random, te, null, de, false);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
}

View File

@ -59,7 +59,7 @@ public class TestPulsingReuse extends LuceneTestCase {
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
TermsEnum te = segment.terms("foo").iterator(null);
while (te.next() != null) {
reuse = te.docs(null, reuse);
reuse = te.docs(null, reuse, false);
allEnums.put(reuse, true);
}
@ -101,7 +101,7 @@ public class TestPulsingReuse extends LuceneTestCase {
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
TermsEnum te = segment.terms("foo").iterator(null);
while (te.next() != null) {
reuse = te.docs(null, reuse);
reuse = te.docs(null, reuse, false);
allEnums.put(reuse, true);
}

View File

@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.English;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestTermVectors extends LuceneTestCase {
private IndexSearcher searcher;
@ -269,7 +270,7 @@ public class TestTermVectors extends LuceneTestCase {
while (termsEnum.next() != null) {
String text = termsEnum.term().utf8ToString();
docs = termsEnum.docs(MultiFields.getLiveDocs(knownSearcher.reader), docs);
docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs, true);
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
int docId = docs.docID();

View File

@ -26,15 +26,16 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
@ -95,15 +96,21 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
writer.close();
IndexReader reader = IndexReader.open(dir, true);
DocsEnum td = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
"partnum",
new BytesRef("Q36"));
DocsEnum td = _TestUtil.docs(random,
reader,
"partnum",
new BytesRef("Q36"),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
td = MultiFields.getTermDocsEnum(reader,
MultiFields.getLiveDocs(reader),
"partnum",
new BytesRef("Q37"));
td = _TestUtil.docs(random,
reader,
"partnum",
new BytesRef("Q37"),
MultiFields.getLiveDocs(reader),
null,
false);
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
}

View File

@ -57,6 +57,7 @@ import org.apache.lucene.search.FieldCache.DocTermsIndex;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
/**
* Test very simply that perf tasks - simple algorithms - are doing what they should.
@ -493,7 +494,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
TermsEnum termsEnum = terms.iterator(null);
DocsEnum docs = null;
while(termsEnum.next() != null) {
docs = termsEnum.docs(MultiFields.getLiveDocs(reader), docs);
docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(reader), docs, true);
while(docs.nextDoc() != docs.NO_MORE_DOCS) {
totalTokenCount2 += docs.freq();
}

View File

@ -106,8 +106,10 @@ class TakmiSampleFixer implements SampleFixer {
Term drillDownTerm = DrillDown.term(searchParams, catPath);
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs, drillDownTerm.field(), drillDownTerm.bytes()),
docIds.iterator());
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
drillDownTerm.field(), drillDownTerm.bytes(),
false),
docIds.iterator());
fresNode.setValue(updatedCount);
}

View File

@ -191,7 +191,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
indexReaderLock.readLock().lock();
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path));
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), false);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
}

View File

@ -405,7 +405,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter)));
new BytesRef(categoryPath.toString(delimiter)),
false);
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return -1; // category does not exist in taxonomy
}
@ -441,7 +442,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter, prefixLen)));
new BytesRef(categoryPath.toString(delimiter, prefixLen)),
false);
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return -1; // category does not exist in taxonomy
}
@ -788,7 +790,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
docsEnum = termsEnum.docs(liveDocs, docsEnum);
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
docsEnum.nextDoc();
cp.clear();
// TODO (Facet): avoid String creation/use bytes?
@ -925,7 +927,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// like Lucene's merge works, we hope there are few seeks.
// TODO (Facet): is there a quicker way? E.g., not specifying the
// next term by name every time?
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i]);
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
int origordinal = otherdocsEnum[i].docID();
ordinalMaps[i].addMapping(origordinal, newordinal);
@ -942,7 +944,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// to be added because it already existed in the main taxonomy.
// TODO (Facet): Again, is there a quicker way?
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde);
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false);
mainde.nextDoc(); // TODO (Facet): check?
int newordinal = mainde.docID();
@ -950,7 +952,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
for (int i=0; i<taxonomies.length; i++) {
if (first.equals(currentOthers[i])) {
// TODO (Facet): again, is there a quicker way?
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i]);
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
int origordinal = otherdocsEnum[i].docID();
ordinalMaps[i].addMapping(origordinal, newordinal);

View File

@ -285,7 +285,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
TermsEnum te = terms.iterator(null);
DocsEnum de = null;
while (te.next() != null) {
de = te.docs(liveDocs, de);
de = _TestUtil.docs(random, te, liveDocs, de, false);
int cnt = 0;
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
cnt++;

View File

@ -6,31 +6,10 @@ import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.junit.Test;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
@ -40,6 +19,25 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.Test;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -91,7 +89,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
// Obtain facets results and hand-test them
assertCorrectResults(facetsCollector);
DocsEnum td = MultiFields.getTermDocsEnum(ir, MultiFields.getLiveDocs(ir), "$facets", new BytesRef("$fulltree$"));
DocsEnum td = _TestUtil.docs(random, ir, "$facets", new BytesRef("$fulltree$"), MultiFields.getLiveDocs(ir), null, false);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
tr.close();
@ -188,7 +186,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
}
private void assertPostingListExists(String field, String text, IndexReader ir) throws IOException {
DocsEnum de = MultiFields.getTermDocsEnum(ir, null, field, new BytesRef(text));
DocsEnum de = _TestUtil.docs(random, ir, field, new BytesRef(text), null, null, false);
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}

View File

@ -81,7 +81,7 @@ public class TermsFilter extends Filter {
if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for
br.copyBytes(term.bytes());
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
docs = termsEnum.docs(acceptDocs, docs);
docs = termsEnum.docs(acceptDocs, docs, false);
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
result.set(docs.docID());
}

View File

@ -62,7 +62,7 @@ public class TFValueSource extends TermFreqValueSource {
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
docs = termsEnum.docs(null, null, true);
} else {
docs = null;
}

View File

@ -55,7 +55,7 @@ public class TermFreqValueSource extends DocFreqValueSource {
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(indexedBytes, false)) {
docs = termsEnum.docs(null, null);
docs = termsEnum.docs(null, null, true);
} else {
docs = null;
}

View File

@ -687,7 +687,7 @@ public class SimpleFacets {
// TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this?
// TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
// TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
docsEnum = termsEnum.docs(null, docsEnum);
docsEnum = termsEnum.docs(null, docsEnum, false);
c=0;
if (docsEnum instanceof MultiDocsEnum) {

View File

@ -314,7 +314,7 @@ class JoinQuery extends Query {
if (freq < minDocFreqFrom) {
fromTermDirectCount++;
// OK to skip liveDocs, since we check for intersection with docs matching query
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum);
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum, false);
DocsEnum docsEnum = fromDeState.docsEnum;
if (docsEnum instanceof MultiDocsEnum) {
@ -379,7 +379,7 @@ class JoinQuery extends Query {
toTermDirectCount++;
// need to use liveDocs here so we don't map to any deleted ones
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.liveDocs, toDeState.docsEnum);
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.liveDocs, toDeState.docsEnum, false);
DocsEnum docsEnum = toDeState.docsEnum;
if (docsEnum instanceof MultiDocsEnum) {

View File

@ -559,7 +559,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null, false);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
@ -861,7 +861,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
int bitsSet = 0;
OpenBitSet obs = null;
DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum);
DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum, false);
if (deState.docsEnum == null) {
deState.docsEnum = docsEnum;
}
@ -942,7 +942,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
if (terms != null) {
final TermsEnum termsEnum = terms.iterator(null);
if (termsEnum.seekExact(termBytes, false)) {
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null, false);
}
}

View File

@ -277,7 +277,7 @@ public class FileFloatSource extends ValueSource {
continue;
}
docsEnum = termsEnum.docs(null, docsEnum);
docsEnum = termsEnum.docs(null, docsEnum, false);
int doc;
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
vals[doc] = fval;

View File

@ -17,6 +17,14 @@
package org.apache.solr.search;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -25,6 +33,7 @@ import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util._TestUtil;
import org.apache.noggit.ObjectBuilder;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
@ -32,14 +41,6 @@ import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import static org.apache.solr.core.SolrCore.verbose;
public class TestRealTimeGet extends SolrTestCaseJ4 {
@ -727,7 +728,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
DocsEnum docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(r), null, false);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();