mirror of https://github.com/apache/lucene.git
Merge in Mikes huge commit (r1210176)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1210184 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
1c8d23a340
|
@ -936,7 +936,7 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
|
||||
if (reuse == null || !(reuse instanceof MemoryDocsEnum)) {
|
||||
reuse = new MemoryDocsEnum();
|
||||
}
|
||||
|
|
|
@ -188,7 +188,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
MemoryIndex memory = new MemoryIndex();
|
||||
memory.addField("foo", "bar", analyzer);
|
||||
IndexReader reader = memory.createSearcher().getIndexReader();
|
||||
DocsEnum disi = reader.termDocsEnum(null, "foo", new BytesRef("bar"));
|
||||
DocsEnum disi = _TestUtil.docs(random, reader, "foo", new BytesRef("bar"), null, null, false);
|
||||
int docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
@ -196,7 +196,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
// now reuse and check again
|
||||
TermsEnum te = reader.terms("foo").iterator(null);
|
||||
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||
disi = te.docs(null, disi);
|
||||
disi = te.docs(null, disi, false);
|
||||
docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
|
|
@ -201,7 +201,7 @@ public class HighFreqTerms {
|
|||
return;
|
||||
}
|
||||
}
|
||||
DocsEnum de = r.termDocsEnum(liveDocs, field, termText);
|
||||
DocsEnum de = r.termDocsEnum(liveDocs, field, termText, true);
|
||||
if (de != null) {
|
||||
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
|
||||
totalTF[0] += de.freq();
|
||||
|
|
|
@ -29,11 +29,11 @@ import org.apache.lucene.index.Fields;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum.SeekStatus;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
@ -139,7 +139,7 @@ public class TestAppendingCodec extends LuceneTestCase {
|
|||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog")));
|
||||
assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the")));
|
||||
DocsEnum de = te.docs(null, null);
|
||||
DocsEnum de = te.docs(null, null, true);
|
||||
assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(2, de.freq());
|
||||
assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS);
|
||||
|
|
|
@ -93,7 +93,7 @@ public class DuplicateFilter extends Filter {
|
|||
if (currTerm == null) {
|
||||
break;
|
||||
} else {
|
||||
docs = termsEnum.docs(acceptDocs, docs);
|
||||
docs = termsEnum.docs(acceptDocs, docs, false);
|
||||
int doc = docs.nextDoc();
|
||||
if (doc != DocsEnum.NO_MORE_DOCS) {
|
||||
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {
|
||||
|
@ -133,7 +133,7 @@ public class DuplicateFilter extends Filter {
|
|||
} else {
|
||||
if (termsEnum.docFreq() > 1) {
|
||||
// unset potential duplicates
|
||||
docs = termsEnum.docs(acceptDocs, docs);
|
||||
docs = termsEnum.docs(acceptDocs, docs, false);
|
||||
int doc = docs.nextDoc();
|
||||
if (doc != DocsEnum.NO_MORE_DOCS) {
|
||||
if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) {
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.sandbox.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StringField;
|
||||
|
@ -28,9 +31,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class DuplicateFilterTest extends LuceneTestCase {
|
||||
private static final String KEY_FIELD = "url";
|
||||
|
@ -134,10 +135,13 @@ public class DuplicateFilterTest extends LuceneTestCase {
|
|||
for (ScoreDoc hit : hits) {
|
||||
Document d = searcher.doc(hit.doc);
|
||||
String url = d.get(KEY_FIELD);
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
KEY_FIELD,
|
||||
new BytesRef(url));
|
||||
DocsEnum td = _TestUtil.docs(random, reader,
|
||||
KEY_FIELD,
|
||||
new BytesRef(url),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
|
||||
int lastDoc = 0;
|
||||
while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
lastDoc = td.docID();
|
||||
|
@ -155,10 +159,13 @@ public class DuplicateFilterTest extends LuceneTestCase {
|
|||
for (ScoreDoc hit : hits) {
|
||||
Document d = searcher.doc(hit.doc);
|
||||
String url = d.get(KEY_FIELD);
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
KEY_FIELD,
|
||||
new BytesRef(url));
|
||||
DocsEnum td = _TestUtil.docs(random, reader,
|
||||
KEY_FIELD,
|
||||
new BytesRef(url),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
|
||||
int lastDoc = 0;
|
||||
td.nextDoc();
|
||||
lastDoc = td.docID();
|
||||
|
|
|
@ -57,7 +57,7 @@ public class CartesianShapeFilter extends Filter {
|
|||
return new DocIdSet() {
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef);
|
||||
return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -70,7 +70,7 @@ public class CartesianShapeFilter extends Filter {
|
|||
for (int i =0; i< sz; i++) {
|
||||
double boxId = area.get(i).doubleValue();
|
||||
NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
|
||||
final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef);
|
||||
final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false);
|
||||
if (docsEnum == null) continue;
|
||||
// iterate through all documents
|
||||
// which have this boxId
|
||||
|
|
|
@ -387,7 +387,7 @@ class BufferedDeletesStream {
|
|||
// System.out.println(" term=" + term);
|
||||
|
||||
if (termsEnum.seekExact(term.bytes(), false)) {
|
||||
DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs);
|
||||
DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs, false);
|
||||
//System.out.println("BDS: got docsEnum=" + docsEnum);
|
||||
|
||||
if (docsEnum != null) {
|
||||
|
|
|
@ -683,6 +683,7 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
DocsEnum docs = null;
|
||||
DocsEnum docsAndFreqs = null;
|
||||
DocsAndPositionsEnum postings = null;
|
||||
|
||||
final FieldsEnum fieldsEnum = fields.iterator();
|
||||
|
@ -740,7 +741,8 @@ public class CheckIndex {
|
|||
status.totFreq += docFreq;
|
||||
sumDocFreq += docFreq;
|
||||
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
docs = termsEnum.docs(liveDocs, docs, false);
|
||||
docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true);
|
||||
postings = termsEnum.docsAndPositions(liveDocs, postings);
|
||||
|
||||
if (hasOrd) {
|
||||
|
@ -762,13 +764,24 @@ public class CheckIndex {
|
|||
status.termCount++;
|
||||
|
||||
final DocsEnum docs2;
|
||||
final DocsEnum docsAndFreqs2;
|
||||
final boolean hasPositions;
|
||||
final boolean hasFreqs;
|
||||
if (postings != null) {
|
||||
docs2 = postings;
|
||||
docsAndFreqs2 = postings;
|
||||
hasPositions = true;
|
||||
hasFreqs = true;
|
||||
} else if (docsAndFreqs != null) {
|
||||
docs2 = docsAndFreqs;
|
||||
docsAndFreqs2 = docsAndFreqs;
|
||||
hasPositions = false;
|
||||
hasFreqs = true;
|
||||
} else {
|
||||
docs2 = docs;
|
||||
docsAndFreqs2 = null;
|
||||
hasPositions = false;
|
||||
hasFreqs = false;
|
||||
}
|
||||
|
||||
int lastDoc = -1;
|
||||
|
@ -780,9 +793,15 @@ public class CheckIndex {
|
|||
break;
|
||||
}
|
||||
visitedDocs.set(doc);
|
||||
final int freq = docs2.freq();
|
||||
status.totPos += freq;
|
||||
totalTermFreq += freq;
|
||||
int freq = -1;
|
||||
if (hasFreqs) {
|
||||
freq = docsAndFreqs2.freq();
|
||||
if (freq <= 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
||||
}
|
||||
status.totPos += freq;
|
||||
totalTermFreq += freq;
|
||||
}
|
||||
docCount++;
|
||||
|
||||
if (doc <= lastDoc) {
|
||||
|
@ -793,12 +812,9 @@ public class CheckIndex {
|
|||
}
|
||||
|
||||
lastDoc = doc;
|
||||
if (freq <= 0) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
|
||||
}
|
||||
|
||||
int lastPos = -1;
|
||||
if (postings != null) {
|
||||
if (hasPositions) {
|
||||
for(int j=0;j<freq;j++) {
|
||||
final int pos = postings.nextPosition();
|
||||
if (pos < -1) {
|
||||
|
@ -820,13 +836,23 @@ public class CheckIndex {
|
|||
|
||||
// Re-count if there are deleted docs:
|
||||
if (reader.hasDeletions()) {
|
||||
final DocsEnum docsNoDel = termsEnum.docs(null, docs);
|
||||
docCount = 0;
|
||||
totalTermFreq = 0;
|
||||
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
visitedDocs.set(docsNoDel.docID());
|
||||
docCount++;
|
||||
totalTermFreq += docsNoDel.freq();
|
||||
if (hasFreqs) {
|
||||
final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs, true);
|
||||
docCount = 0;
|
||||
totalTermFreq = 0;
|
||||
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
visitedDocs.set(docsNoDel.docID());
|
||||
docCount++;
|
||||
totalTermFreq += docsNoDel.freq();
|
||||
}
|
||||
} else {
|
||||
final DocsEnum docsNoDel = termsEnum.docs(null, docs, false);
|
||||
docCount = 0;
|
||||
totalTermFreq = -1;
|
||||
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
visitedDocs.set(docsNoDel.docID());
|
||||
docCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -883,7 +909,7 @@ public class CheckIndex {
|
|||
} else {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
docs = termsEnum.docs(liveDocs, docs, false);
|
||||
final int docID = docs.advance(skipDocID);
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
|
@ -986,7 +1012,7 @@ public class CheckIndex {
|
|||
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
docs = termsEnum.docs(liveDocs, docs, false);
|
||||
if (docs == null) {
|
||||
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
|
||||
}
|
||||
|
@ -1168,6 +1194,7 @@ public class CheckIndex {
|
|||
|
||||
// TODO: maybe we can factor out testTermIndex and reuse here?
|
||||
DocsEnum docs = null;
|
||||
DocsEnum docsAndFreqs = null;
|
||||
DocsAndPositionsEnum postings = null;
|
||||
final Bits liveDocs = reader.getLiveDocs();
|
||||
for (int j = 0; j < info.docCount; ++j) {
|
||||
|
@ -1210,50 +1237,53 @@ public class CheckIndex {
|
|||
if (totalTermFreq != -1 && totalTermFreq <= 0) {
|
||||
throw new RuntimeException("totalTermFreq: " + totalTermFreq + " is out of bounds");
|
||||
}
|
||||
|
||||
DocsEnum docsEnum;
|
||||
DocsAndPositionsEnum dp = termsEnum.docsAndPositions(null, postings);
|
||||
if (dp == null) {
|
||||
DocsEnum d = termsEnum.docs(null, docs);
|
||||
docsEnum = docs = d;
|
||||
|
||||
postings = termsEnum.docsAndPositions(null, postings);
|
||||
if (postings == null) {
|
||||
docsAndFreqs = termsEnum.docs(null, docsAndFreqs, true);
|
||||
if (docsAndFreqs == null) {
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
} else {
|
||||
docs = docsAndFreqs;
|
||||
}
|
||||
} else {
|
||||
docsEnum = postings = dp;
|
||||
docs = docsAndFreqs = postings;
|
||||
}
|
||||
|
||||
final int doc = docsEnum.nextDoc();
|
||||
|
||||
final int doc = docs.nextDoc();
|
||||
|
||||
if (doc != 0) {
|
||||
throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc);
|
||||
}
|
||||
|
||||
final int tf = docsEnum.freq();
|
||||
tfvComputedSumTotalTermFreq += tf;
|
||||
|
||||
if (docsAndFreqs != null) {
|
||||
final int tf = docsAndFreqs.freq();
|
||||
if (tf <= 0) {
|
||||
throw new RuntimeException("vector freq " + tf + " is out of bounds");
|
||||
}
|
||||
if (totalTermFreq != -1 && totalTermFreq != tf) {
|
||||
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
|
||||
}
|
||||
tfvComputedSumTotalTermFreq += tf;
|
||||
|
||||
if (tf <= 0) {
|
||||
throw new RuntimeException("vector freq " + tf + " is out of bounds");
|
||||
}
|
||||
|
||||
if (totalTermFreq != -1 && totalTermFreq != tf) {
|
||||
throw new RuntimeException("vector totalTermFreq " + totalTermFreq + " != tf " + tf);
|
||||
}
|
||||
|
||||
if (dp != null) {
|
||||
int lastPosition = -1;
|
||||
for (int i = 0; i < tf; i++) {
|
||||
int pos = dp.nextPosition();
|
||||
if (pos != -1 && pos < 0) {
|
||||
throw new RuntimeException("vector position " + pos + " is out of bounds");
|
||||
}
|
||||
if (postings != null) {
|
||||
int lastPosition = -1;
|
||||
for (int i = 0; i < tf; i++) {
|
||||
int pos = postings.nextPosition();
|
||||
if (pos != -1 && pos < 0) {
|
||||
throw new RuntimeException("vector position " + pos + " is out of bounds");
|
||||
}
|
||||
|
||||
if (pos < lastPosition) {
|
||||
throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
|
||||
}
|
||||
if (pos < lastPosition) {
|
||||
throw new RuntimeException("vector position " + pos + " < lastPos " + lastPosition);
|
||||
}
|
||||
|
||||
lastPosition = pos;
|
||||
lastPosition = pos;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
throw new RuntimeException("vector for doc " + j + " references multiple documents!");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -314,7 +314,7 @@ public class DocTermOrds {
|
|||
final int df = te.docFreq();
|
||||
if (df <= maxTermDocFreq) {
|
||||
|
||||
docsEnum = te.docs(liveDocs, docsEnum);
|
||||
docsEnum = te.docs(liveDocs, docsEnum, false);
|
||||
|
||||
// dF, but takes deletions into account
|
||||
int actualDF = 0;
|
||||
|
@ -650,8 +650,8 @@ public class DocTermOrds {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
return termsEnum.docs(liveDocs, reuse);
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
return termsEnum.docs(liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.index;
|
|||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/** Iterates through the documents, term freq and positions.
|
||||
/** Iterates through the documents and term freqs.
|
||||
* NOTE: you must first call {@link #nextDoc} before using
|
||||
* any of the per-doc methods. */
|
||||
public abstract class DocsEnum extends DocIdSetIterator {
|
||||
|
|
|
@ -44,6 +44,7 @@ public final class FieldInfo {
|
|||
*/
|
||||
public static enum IndexOptions {
|
||||
/** only documents are indexed: term frequencies and positions are omitted */
|
||||
// TODO: maybe rename to just DOCS?
|
||||
DOCS_ONLY,
|
||||
/** only documents and term frequencies are indexed: positions are omitted */
|
||||
DOCS_AND_FREQS,
|
||||
|
|
|
@ -175,8 +175,8 @@ public class FilterIndexReader extends IndexReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
return in.docs(liveDocs, reuse);
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
return in.docs(liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -166,8 +166,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits bits, DocsEnum reuse) throws IOException {
|
||||
return tenum.docs(bits, reuse);
|
||||
public DocsEnum docs(Bits bits, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
return tenum.docs(bits, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -997,7 +997,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
/** Returns {@link DocsEnum} for the specified field &
|
||||
* term. This may return null, if either the field or
|
||||
* term does not exist. */
|
||||
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
|
||||
assert field != null;
|
||||
assert term != null;
|
||||
final Fields fields = fields();
|
||||
|
@ -1006,7 +1006,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
return termsEnum.docs(liveDocs, null, needsFreqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1016,7 +1016,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* field & term. This may return null, if either the
|
||||
* field or term does not exist, or, positions were not
|
||||
* stored for this term. */
|
||||
* indexed for this field. */
|
||||
public DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
assert field != null;
|
||||
assert term != null;
|
||||
|
@ -1038,7 +1038,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
* {@link TermState}. This may return null, if either the field or the term
|
||||
* does not exists or the {@link TermState} is invalid for the underlying
|
||||
* implementation.*/
|
||||
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
|
||||
public DocsEnum termDocsEnum(Bits liveDocs, String field, BytesRef term, TermState state, boolean needsFreqs) throws IOException {
|
||||
assert state != null;
|
||||
assert field != null;
|
||||
final Fields fields = fields();
|
||||
|
@ -1047,7 +1047,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
termsEnum.seekExact(term, state);
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
return termsEnum.docs(liveDocs, null, needsFreqs);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
@ -1057,7 +1057,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
* Returns {@link DocsAndPositionsEnum} for the specified field and
|
||||
* {@link TermState}. This may return null, if either the field or the term
|
||||
* does not exists, the {@link TermState} is invalid for the underlying
|
||||
* implementation, or positions were not stored for this term.*/
|
||||
* implementation, or positions were not indexed for this field. */
|
||||
public DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, TermState state) throws IOException {
|
||||
assert state != null;
|
||||
assert field != null;
|
||||
|
|
|
@ -151,14 +151,14 @@ public final class MultiFields extends Fields {
|
|||
/** Returns {@link DocsEnum} for the specified field &
|
||||
* term. This may return null if the term does not
|
||||
* exist. */
|
||||
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsFreqs) throws IOException {
|
||||
assert field != null;
|
||||
assert term != null;
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term, true)) {
|
||||
return termsEnum.docs(liveDocs, null);
|
||||
return termsEnum.docs(liveDocs, null, needsFreqs);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -347,7 +347,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
MultiDocsEnum docsEnum;
|
||||
// Can only reuse if incoming enum is also a MultiDocsEnum
|
||||
if (reuse != null && reuse instanceof MultiDocsEnum) {
|
||||
|
@ -397,14 +397,16 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
|
||||
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index]);
|
||||
|
||||
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index], needsFreqs);
|
||||
if (subDocsEnum != null) {
|
||||
docsEnum.subDocsEnum[entry.index] = subDocsEnum;
|
||||
subDocs[upto].docsEnum = subDocsEnum;
|
||||
subDocs[upto].slice = entry.subSlice;
|
||||
|
||||
upto++;
|
||||
} else {
|
||||
// One of our subs cannot provide freqs:
|
||||
assert needsFreqs;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -475,7 +477,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
subDocsAndPositions[upto].slice = entry.subSlice;
|
||||
upto++;
|
||||
} else {
|
||||
if (entry.terms.docs(b, null) != null) {
|
||||
if (entry.terms.docs(b, null, false) != null) {
|
||||
// At least one of our subs does not store
|
||||
// positions -- we can't correctly produce a
|
||||
// MultiDocsAndPositions enum
|
||||
|
|
|
@ -147,12 +147,16 @@ public abstract class TermsEnum {
|
|||
|
||||
/** Get {@link DocsEnum} for the current term. Do not
|
||||
* call this when the enum is unpositioned. This method
|
||||
* will not return null.
|
||||
* may return null (if needsFreqs is true but freqs were
|
||||
* not indexed for this field).
|
||||
*
|
||||
* @param liveDocs unset bits are documents that should not
|
||||
* be returned
|
||||
* @param reuse pass a prior DocsEnum for possible reuse */
|
||||
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException;
|
||||
* @param reuse pass a prior DocsEnum for possible reuse
|
||||
* @param needsFreqs true if the caller intends to call
|
||||
* {@link DocsEnum#freq}. If you pass false you must not
|
||||
* call {@link DocsEnum#freq} in the returned DocsEnum. */
|
||||
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException;
|
||||
|
||||
/** Get {@link DocsAndPositionsEnum} for the current term.
|
||||
* Do not call this when the enum is unpositioned.
|
||||
|
@ -229,7 +233,7 @@ public abstract class TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
|
||||
throw new IllegalStateException("this method should never be called");
|
||||
}
|
||||
|
||||
|
|
|
@ -25,8 +25,8 @@ import java.util.TreeMap;
|
|||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -689,14 +689,11 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
//System.out.println("BTR.docs this=" + this);
|
||||
decodeMetaData();
|
||||
//System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
|
||||
final DocsEnum docsEnum = postingsReader.docs(fieldInfo, state, liveDocs, reuse);
|
||||
assert docsEnum != null;
|
||||
//System.out.println("BTR.docs: return docsEnum=" + docsEnum);
|
||||
return docsEnum;
|
||||
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -875,9 +875,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse);
|
||||
return postingsReader.docs(fieldInfo, currentFrame.termState, skipDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -2082,7 +2082,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
assert !eof;
|
||||
//if (DEBUG) {
|
||||
//System.out.println("BTTR.docs seg=" + segment);
|
||||
|
@ -2091,10 +2091,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
|||
//if (DEBUG) {
|
||||
//System.out.println(" state=" + currentFrame.state);
|
||||
//}
|
||||
final DocsEnum docsEnum = postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse);
|
||||
|
||||
assert docsEnum != null;
|
||||
return docsEnum;
|
||||
return postingsReader.docs(fieldInfo, currentFrame.state, skipDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -63,7 +63,19 @@ public abstract class PostingsConsumer {
|
|||
int df = 0;
|
||||
long totTF = 0;
|
||||
|
||||
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
while(true) {
|
||||
final int doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
visitedDocs.set(doc);
|
||||
this.startDoc(doc, 0);
|
||||
this.finishDoc();
|
||||
df++;
|
||||
}
|
||||
totTF = -1;
|
||||
} else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
while(true) {
|
||||
final int doc = postings.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
|
|
@ -51,7 +51,7 @@ public abstract class PostingsReaderBase implements Closeable {
|
|||
|
||||
/** Must fully consume state, since after this call that
|
||||
* TermState may be reused. */
|
||||
public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
|
||||
public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, boolean needsFreqs) throws IOException;
|
||||
|
||||
/** Must fully consume state, since after this call that
|
||||
* TermState may be reused. */
|
||||
|
|
|
@ -51,8 +51,9 @@ public abstract class TermsConsumer {
|
|||
public abstract Comparator<BytesRef> getComparator() throws IOException;
|
||||
|
||||
/** Default merge impl */
|
||||
private MappingMultiDocsEnum docsEnum = null;
|
||||
private MappingMultiDocsAndPositionsEnum postingsEnum = null;
|
||||
private MappingMultiDocsEnum docsEnum;
|
||||
private MappingMultiDocsEnum docsAndFreqsEnum;
|
||||
private MappingMultiDocsAndPositionsEnum postingsEnum;
|
||||
|
||||
public void merge(MergeState mergeState, TermsEnum termsEnum) throws IOException {
|
||||
|
||||
|
@ -63,7 +64,7 @@ public abstract class TermsConsumer {
|
|||
long sumDFsinceLastAbortCheck = 0;
|
||||
FixedBitSet visitedDocs = new FixedBitSet(mergeState.mergedDocCount);
|
||||
|
||||
if (mergeState.fieldInfo.indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
if (docsEnum == null) {
|
||||
docsEnum = new MappingMultiDocsEnum();
|
||||
}
|
||||
|
@ -74,14 +75,14 @@ public abstract class TermsConsumer {
|
|||
while((term = termsEnum.next()) != null) {
|
||||
// We can pass null for liveDocs, because the
|
||||
// mapping enum will skip the non-live docs:
|
||||
docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn);
|
||||
docsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsEnumIn, false);
|
||||
if (docsEnumIn != null) {
|
||||
docsEnum.reset(docsEnumIn);
|
||||
final PostingsConsumer postingsConsumer = startTerm(term);
|
||||
final TermStats stats = postingsConsumer.merge(mergeState, docsEnum, visitedDocs);
|
||||
if (stats.docFreq > 0) {
|
||||
finishTerm(term, stats);
|
||||
sumTotalTermFreq += stats.totalTermFreq;
|
||||
sumTotalTermFreq += stats.docFreq;
|
||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||
sumDocFreq += stats.docFreq;
|
||||
if (sumDFsinceLastAbortCheck > 60000) {
|
||||
|
@ -91,7 +92,35 @@ public abstract class TermsConsumer {
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS) {
|
||||
if (docsAndFreqsEnum == null) {
|
||||
docsAndFreqsEnum = new MappingMultiDocsEnum();
|
||||
}
|
||||
docsAndFreqsEnum.setMergeState(mergeState);
|
||||
|
||||
MultiDocsEnum docsAndFreqsEnumIn = null;
|
||||
|
||||
while((term = termsEnum.next()) != null) {
|
||||
// We can pass null for liveDocs, because the
|
||||
// mapping enum will skip the non-live docs:
|
||||
docsAndFreqsEnumIn = (MultiDocsEnum) termsEnum.docs(null, docsAndFreqsEnumIn, true);
|
||||
assert docsAndFreqsEnumIn != null;
|
||||
docsAndFreqsEnum.reset(docsAndFreqsEnumIn);
|
||||
final PostingsConsumer postingsConsumer = startTerm(term);
|
||||
final TermStats stats = postingsConsumer.merge(mergeState, docsAndFreqsEnum, visitedDocs);
|
||||
if (stats.docFreq > 0) {
|
||||
finishTerm(term, stats);
|
||||
sumTotalTermFreq += stats.totalTermFreq;
|
||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||
sumDocFreq += stats.docFreq;
|
||||
if (sumDFsinceLastAbortCheck > 60000) {
|
||||
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
|
||||
sumDFsinceLastAbortCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert mergeState.fieldInfo.indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
if (postingsEnum == null) {
|
||||
postingsEnum = new MappingMultiDocsAndPositionsEnum();
|
||||
}
|
||||
|
@ -101,27 +130,26 @@ public abstract class TermsConsumer {
|
|||
// We can pass null for liveDocs, because the
|
||||
// mapping enum will skip the non-live docs:
|
||||
postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn);
|
||||
if (postingsEnumIn != null) {
|
||||
postingsEnum.reset(postingsEnumIn);
|
||||
// set PayloadProcessor
|
||||
if (mergeState.payloadProcessorProvider != null) {
|
||||
for (int i = 0; i < mergeState.readers.size(); i++) {
|
||||
if (mergeState.dirPayloadProcessor[i] != null) {
|
||||
mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
|
||||
}
|
||||
assert postingsEnumIn != null;
|
||||
postingsEnum.reset(postingsEnumIn);
|
||||
// set PayloadProcessor
|
||||
if (mergeState.payloadProcessorProvider != null) {
|
||||
for (int i = 0; i < mergeState.readers.size(); i++) {
|
||||
if (mergeState.dirPayloadProcessor[i] != null) {
|
||||
mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term);
|
||||
}
|
||||
}
|
||||
final PostingsConsumer postingsConsumer = startTerm(term);
|
||||
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
|
||||
if (stats.docFreq > 0) {
|
||||
finishTerm(term, stats);
|
||||
sumTotalTermFreq += stats.totalTermFreq;
|
||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||
sumDocFreq += stats.docFreq;
|
||||
if (sumDFsinceLastAbortCheck > 60000) {
|
||||
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
|
||||
sumDFsinceLastAbortCheck = 0;
|
||||
}
|
||||
}
|
||||
final PostingsConsumer postingsConsumer = startTerm(term);
|
||||
final TermStats stats = postingsConsumer.merge(mergeState, postingsEnum, visitedDocs);
|
||||
if (stats.docFreq > 0) {
|
||||
finishTerm(term, stats);
|
||||
sumTotalTermFreq += stats.totalTermFreq;
|
||||
sumDFsinceLastAbortCheck += stats.docFreq;
|
||||
sumDocFreq += stats.docFreq;
|
||||
if (sumDFsinceLastAbortCheck > 60000) {
|
||||
mergeState.checkAbort.work(sumDFsinceLastAbortCheck/5.0);
|
||||
sumDFsinceLastAbortCheck = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,8 +27,8 @@ import java.util.TreeMap;
|
|||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -950,9 +950,11 @@ public class Lucene3xFields extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
PreDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof PreDocsEnum)) {
|
||||
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof PreDocsEnum)) {
|
||||
docsEnum = new PreDocsEnum();
|
||||
} else {
|
||||
docsEnum = (PreDocsEnum) reuse;
|
||||
|
|
|
@ -112,7 +112,10 @@ public class SegmentTermDocs {
|
|||
}
|
||||
|
||||
public final int doc() { return doc; }
|
||||
public final int freq() { return freq; }
|
||||
public final int freq() {
|
||||
assert indexOptions != IndexOptions.DOCS_ONLY;
|
||||
return freq;
|
||||
}
|
||||
|
||||
protected void skippingDoc() throws IOException {
|
||||
}
|
||||
|
@ -125,7 +128,6 @@ public class SegmentTermDocs {
|
|||
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
doc += docCode;
|
||||
freq = 1;
|
||||
} else {
|
||||
doc += docCode >>> 1; // shift off low bit
|
||||
if ((docCode & 1) != 0) // if low bit is set
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index.codecs.lucene40;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
|
@ -209,9 +208,11 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
SegmentDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
|
||||
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof SegmentDocsEnum)) {
|
||||
docsEnum = new SegmentDocsEnum(freqIn);
|
||||
} else {
|
||||
docsEnum = (SegmentDocsEnum) reuse;
|
||||
|
@ -277,7 +278,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
final IndexInput freqIn;
|
||||
final IndexInput startFreqIn;
|
||||
|
||||
boolean omitTF; // does current field omit term freq?
|
||||
boolean indexOmitsTF; // does current field omit term freq?
|
||||
boolean storePayloads; // does current field store payloads?
|
||||
|
||||
int limit; // number of docs in this posting
|
||||
|
@ -300,12 +301,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
public SegmentDocsEnum reset(FieldInfo fieldInfo, StandardTermState termState, Bits liveDocs) throws IOException {
|
||||
omitTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||
if (omitTF) {
|
||||
freq = 1;
|
||||
Arrays.fill(freqs, 1);
|
||||
}
|
||||
|
||||
indexOmitsTF = fieldInfo.indexOptions == IndexOptions.DOCS_ONLY;
|
||||
storePayloads = fieldInfo.storePayloads;
|
||||
this.liveDocs = liveDocs;
|
||||
freqOffset = termState.freqOffset;
|
||||
|
@ -331,6 +327,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert !indexOmitsTF;
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
@ -389,7 +386,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
count = bufferSize;
|
||||
ord += bufferSize;
|
||||
|
||||
if (omitTF)
|
||||
if (indexOmitsTF)
|
||||
fillDocs(bufferSize);
|
||||
else
|
||||
fillDocsAndFreqs(bufferSize);
|
||||
|
@ -400,7 +397,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
private int scanTo(int target) throws IOException {
|
||||
while (ord++ < limit) {
|
||||
int code = freqIn.readVInt();
|
||||
if (omitTF) {
|
||||
if (indexOmitsTF) {
|
||||
accum += code;
|
||||
} else {
|
||||
accum += code >>> 1; // shift off low bit
|
||||
|
|
|
@ -505,7 +505,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs /* ignored */) throws IOException {
|
||||
TVDocsEnum docsEnum;
|
||||
if (reuse != null && reuse instanceof TVDocsEnum) {
|
||||
docsEnum = (TVDocsEnum) reuse;
|
||||
|
|
|
@ -26,8 +26,8 @@ import java.util.TreeMap;
|
|||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -36,10 +36,10 @@ import org.apache.lucene.index.SegmentReadState;
|
|||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.codecs.PostingsFormat;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||
import org.apache.lucene.index.codecs.PostingsFormat;
|
||||
import org.apache.lucene.index.codecs.TermStats;
|
||||
import org.apache.lucene.index.codecs.TermsConsumer;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
|
@ -317,7 +317,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
docUpto++;
|
||||
if (indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
accum += in.readVInt();
|
||||
freq = 1;
|
||||
} else {
|
||||
final int code = in.readVInt();
|
||||
accum += code >>> 1;
|
||||
|
@ -371,6 +370,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert indexOptions != IndexOptions.DOCS_ONLY;
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
@ -600,10 +600,13 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
decodeMetaData();
|
||||
FSTDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
|
||||
|
||||
if (needsFreqs && field.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
} else if (reuse == null || !(reuse instanceof FSTDocsEnum)) {
|
||||
docsEnum = new FSTDocsEnum(field.indexOptions, field.storePayloads);
|
||||
} else {
|
||||
docsEnum = (FSTDocsEnum) reuse;
|
||||
|
|
|
@ -178,7 +178,10 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
if (needsFreqs && field.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
}
|
||||
PulsingTermState termState = (PulsingTermState) _termState;
|
||||
if (termState.postingsSize != -1) {
|
||||
PulsingDocsEnum postings;
|
||||
|
@ -202,11 +205,11 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
return postings.reset(liveDocs, termState);
|
||||
} else {
|
||||
if (reuse instanceof PulsingDocsEnum) {
|
||||
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse));
|
||||
DocsEnum wrapped = wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, getOther(reuse), needsFreqs);
|
||||
setOther(wrapped, reuse); // wrapped.other = reuse
|
||||
return wrapped;
|
||||
} else {
|
||||
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse);
|
||||
return wrappedPostingsReader.docs(field, termState.wrappedTermState, liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +286,6 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
docID = -1;
|
||||
accum = 0;
|
||||
payloadLength = 0;
|
||||
freq = 1;
|
||||
this.liveDocs = liveDocs;
|
||||
return this;
|
||||
}
|
||||
|
@ -342,6 +344,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert indexOptions != IndexOptions.DOCS_ONLY;
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -272,7 +272,10 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
if (needsFreqs && fieldInfo.indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
}
|
||||
final SepTermState termState = (SepTermState) _termState;
|
||||
SepDocsEnum docsEnum;
|
||||
if (reuse == null || !(reuse instanceof SepDocsEnum)) {
|
||||
|
@ -369,8 +372,6 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
if (!omitTF) {
|
||||
freqIndex.set(termState.freqIndex);
|
||||
freqIndex.seek(freqReader);
|
||||
} else {
|
||||
freq = 1;
|
||||
}
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
|
@ -412,6 +413,7 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert !omitTF;
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,19 +17,24 @@ package org.apache.lucene.index.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.codecs.FieldsProducer;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -37,13 +42,8 @@ import org.apache.lucene.util.UnicodeUtil;
|
|||
import org.apache.lucene.util.fst.Builder;
|
||||
import org.apache.lucene.util.fst.BytesRefFSTEnum;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
|
||||
class SimpleTextFieldsReader extends FieldsProducer {
|
||||
|
||||
|
@ -190,14 +190,17 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
if (needsFreqs && indexOptions == IndexOptions.DOCS_ONLY) {
|
||||
return null;
|
||||
}
|
||||
SimpleTextDocsEnum docsEnum;
|
||||
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
|
||||
docsEnum = (SimpleTextDocsEnum) reuse;
|
||||
} else {
|
||||
docsEnum = new SimpleTextDocsEnum();
|
||||
}
|
||||
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS_ONLY);
|
||||
return docsEnum.reset(docsStart, liveDocs, !needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -245,9 +248,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
in.seek(fp);
|
||||
this.omitTF = omitTF;
|
||||
docID = -1;
|
||||
if (omitTF) {
|
||||
tf = 1;
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -258,6 +258,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert !omitTF;
|
||||
return tf;
|
||||
}
|
||||
|
||||
|
|
|
@ -365,10 +365,10 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
// TODO: reuse
|
||||
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
|
||||
e.reset(liveDocs, current.getValue().freq);
|
||||
e.reset(liveDocs, needsFreqs ? current.getValue().freq : -1);
|
||||
return e;
|
||||
}
|
||||
|
||||
|
@ -399,6 +399,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
|
||||
@Override
|
||||
public int freq() {
|
||||
assert freq != -1;
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,21 +17,22 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.TermQuery.TermWeight;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.ConjunctionTermScorer.DocsAndFreqs;
|
||||
import org.apache.lucene.search.TermQuery.TermWeight;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.SimilarityProvider;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** A Query that matches documents matching boolean combinations of other
|
||||
* queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other
|
||||
* BooleanQuerys.
|
||||
|
@ -349,6 +350,11 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
|
||||
private Scorer createConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
|
||||
throws IOException {
|
||||
|
||||
// TODO: fix scorer API to specify "needsScores" up
|
||||
// front, so we can do match-only if caller doesn't
|
||||
// needs scores
|
||||
|
||||
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
|
||||
for (int i = 0; i < docsAndFreqs.length; i++) {
|
||||
final TermWeight weight = (TermWeight) weights.get(i);
|
||||
|
@ -357,12 +363,46 @@ public class BooleanQuery extends Query implements Iterable<BooleanClause> {
|
|||
return null;
|
||||
}
|
||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
||||
docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docs(
|
||||
acceptDocs, null), termsEnum.docFreq(), docScorer);
|
||||
final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
|
||||
if (docsAndFreqsEnum == null) {
|
||||
// TODO: we could carry over TermState from the
|
||||
// terms we already seek'd to, to save re-seeking
|
||||
// to make the match-only scorer, but it's likely
|
||||
// rare that BQ mixes terms from omitTf and
|
||||
// non-omitTF fields:
|
||||
|
||||
// At least one sub cannot provide freqs; abort
|
||||
// and fallback to full match-only scorer:
|
||||
return createMatchOnlyConjunctionTermScorer(context, acceptDocs);
|
||||
}
|
||||
|
||||
docsAndFreqs[i] = new DocsAndFreqs(docsAndFreqsEnum,
|
||||
docsAndFreqsEnum,
|
||||
termsEnum.docFreq(), docScorer);
|
||||
}
|
||||
return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
|
||||
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
|
||||
}
|
||||
|
||||
private Scorer createMatchOnlyConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
|
||||
throws IOException {
|
||||
|
||||
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
|
||||
for (int i = 0; i < docsAndFreqs.length; i++) {
|
||||
final TermWeight weight = (TermWeight) weights.get(i);
|
||||
final TermsEnum termsEnum = weight.getTermsEnum(context);
|
||||
if (termsEnum == null) {
|
||||
return null;
|
||||
}
|
||||
final ExactDocScorer docScorer = weight.createDocScorer(context);
|
||||
docsAndFreqs[i] = new DocsAndFreqs(null,
|
||||
termsEnum.docs(acceptDocs, null, false),
|
||||
termsEnum.docFreq(), docScorer);
|
||||
}
|
||||
|
||||
return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
|
||||
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean scoresDocsOutOfOrder() {
|
||||
|
|
|
@ -17,17 +17,18 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Scorer for conjunctions, sets of terms, all of which are required. */
|
||||
final class ConjunctionTermScorer extends Scorer {
|
||||
private final float coord;
|
||||
private int lastDoc = -1;
|
||||
private final DocsAndFreqs[] docsAndFreqs;
|
||||
class ConjunctionTermScorer extends Scorer {
|
||||
protected final float coord;
|
||||
protected int lastDoc = -1;
|
||||
protected final DocsAndFreqs[] docsAndFreqs;
|
||||
private final DocsAndFreqs lead;
|
||||
|
||||
ConjunctionTermScorer(Weight weight, float coord,
|
||||
|
@ -39,7 +40,7 @@ final class ConjunctionTermScorer extends Scorer {
|
|||
// lead the matching.
|
||||
ArrayUtil.mergeSort(docsAndFreqs, new Comparator<DocsAndFreqs>() {
|
||||
public int compare(DocsAndFreqs o1, DocsAndFreqs o2) {
|
||||
return o1.freq - o2.freq;
|
||||
return o1.docFreq - o2.docFreq;
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -96,14 +97,16 @@ final class ConjunctionTermScorer extends Scorer {
|
|||
}
|
||||
|
||||
static final class DocsAndFreqs {
|
||||
final DocsEnum docsAndFreqs;
|
||||
final DocsEnum docs;
|
||||
final int freq;
|
||||
final int docFreq;
|
||||
final ExactDocScorer docScorer;
|
||||
int doc = -1;
|
||||
|
||||
DocsAndFreqs(DocsEnum docs, int freq, ExactDocScorer docScorer) {
|
||||
DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) {
|
||||
this.docsAndFreqs = docsAndFreqs;
|
||||
this.docs = docs;
|
||||
this.freq = freq;
|
||||
this.docFreq = docFreq;
|
||||
this.docScorer = docScorer;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -342,7 +342,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
break;
|
||||
}
|
||||
final byte termval = parser.parseByte(term);
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -415,7 +415,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
break;
|
||||
}
|
||||
final short termval = parser.parseShort(term);
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -519,7 +519,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
retArray = new int[maxDoc];
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -586,7 +586,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
res = new FixedBitSet(maxDoc);
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
// TODO: use bulk API
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
|
@ -669,7 +669,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
retArray = new float[maxDoc];
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -757,7 +757,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
retArray = new long[maxDoc];
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -846,7 +846,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
retArray = new double[maxDoc];
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -1020,7 +1020,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
@ -1147,7 +1147,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1));
|
||||
}
|
||||
termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term));
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -1268,7 +1268,7 @@ class FieldCacheImpl implements FieldCache {
|
|||
break;
|
||||
}
|
||||
final long pointer = bytes.copyUsingLengthPrefix(term);
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.docs(null, docs, false);
|
||||
while (true) {
|
||||
final int docID = docs.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
|
|
|
@ -259,8 +259,8 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
return actualEnum.docs(liveDocs, reuse);
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
return actualEnum.docs(liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Scorer for conjunctions, sets of terms, all of which are required. */
|
||||
final class MatchOnlyConjunctionTermScorer extends ConjunctionTermScorer {
|
||||
MatchOnlyConjunctionTermScorer(Weight weight, float coord,
|
||||
DocsAndFreqs[] docsAndFreqs) throws IOException {
|
||||
super(weight, coord, docsAndFreqs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
float sum = 0.0f;
|
||||
for (DocsAndFreqs docs : docsAndFreqs) {
|
||||
sum += docs.docScorer.score(lastDoc, 1);
|
||||
}
|
||||
return sum * coord;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/** Expert: A <code>Scorer</code> for documents matching a
|
||||
* <code>Term</code>. It treats all documents as having
|
||||
* one occurrenc (tf=1) for the term.
|
||||
*/
|
||||
|
||||
final class MatchOnlyTermScorer extends Scorer {
|
||||
private final DocsEnum docsEnum;
|
||||
private final Similarity.ExactDocScorer docScorer;
|
||||
|
||||
/**
|
||||
* Construct a <code>TermScorer</code>.
|
||||
*
|
||||
* @param weight
|
||||
* The weight of the <code>Term</code> in the query.
|
||||
* @param td
|
||||
* An iterator over the documents matching the <code>Term</code>.
|
||||
* @param docScorer
|
||||
* The </code>Similarity.ExactDocScorer</code> implementation
|
||||
* to be used for score computations.
|
||||
*/
|
||||
MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.docsEnum = td;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docsEnum.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances to the next document matching the query. <br>
|
||||
*
|
||||
* @return the document matching the query or NO_MORE_DOCS if there are no more documents.
|
||||
*/
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return docsEnum.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
assert docID() != NO_MORE_DOCS;
|
||||
return docScorer.score(docsEnum.docID(), 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances to the first match beyond the current whose document number is
|
||||
* greater than or equal to a given target. <br>
|
||||
* The implementation uses {@link DocsEnum#advance(int)}.
|
||||
*
|
||||
* @param target
|
||||
* The target document number.
|
||||
* @return the matching document or NO_MORE_DOCS if none exist.
|
||||
*/
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return docsEnum.advance(target);
|
||||
}
|
||||
|
||||
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
}
|
|
@ -229,7 +229,7 @@ public class MultiPhraseQuery extends Query {
|
|||
|
||||
if (postingsEnum == null) {
|
||||
// term does exist, but has no positions
|
||||
assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader";
|
||||
assert termsEnum.docs(liveDocs, null, false) != null: "termstate found but no term exists in reader";
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
}
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
do {
|
||||
// System.out.println(" iter termCount=" + termCount + " term=" +
|
||||
// enumerator.term().toBytesString());
|
||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum);
|
||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum, false);
|
||||
int docid;
|
||||
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
bitSet.set(docid);
|
||||
|
|
|
@ -244,7 +244,7 @@ public class PhraseQuery extends Query {
|
|||
// PhraseQuery on a field that did not index
|
||||
// positions.
|
||||
if (postingsEnum == null) {
|
||||
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader";
|
||||
assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state, false) != null: "termstate found but no term exists in reader";
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||
}
|
||||
|
|
|
@ -21,18 +21,18 @@ import java.io.IOException;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.ExactDocScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** A Query that matches documents containing a term.
|
||||
|
@ -83,10 +83,15 @@ public class TermQuery extends Query {
|
|||
if (termsEnum == null) {
|
||||
return null;
|
||||
}
|
||||
// TODO should we reuse the DocsEnum here?
|
||||
final DocsEnum docs = termsEnum.docs(acceptDocs, null);
|
||||
assert docs != null;
|
||||
return new TermScorer(this, docs, createDocScorer(context));
|
||||
DocsEnum docs = termsEnum.docs(acceptDocs, null, true);
|
||||
if (docs != null) {
|
||||
return new TermScorer(this, docs, createDocScorer(context));
|
||||
} else {
|
||||
// Index does not store freq info
|
||||
docs = termsEnum.docs(acceptDocs, null, false);
|
||||
assert docs != null;
|
||||
return new MatchOnlyTermScorer(this, docs, createDocScorer(context));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -120,12 +125,11 @@ public class TermQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
|
||||
IndexReader reader = context.reader;
|
||||
DocsEnum docs = reader.termDocsEnum(context.reader.getLiveDocs(), term.field(), term.bytes());
|
||||
if (docs != null) {
|
||||
int newDoc = docs.advance(doc);
|
||||
Scorer scorer = scorer(context, true, false, context.reader.getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
int freq = docs.freq();
|
||||
float freq = scorer.freq();
|
||||
ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
|
@ -136,8 +140,7 @@ public class TermQuery extends Query {
|
|||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -383,7 +383,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse) {
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) {
|
||||
return new RAMDocsEnum(ramField.termToDocs.get(current), liveDocs);
|
||||
}
|
||||
|
||||
|
|
|
@ -37,12 +37,18 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.CheckIndex;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LogMergePolicy;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.PostingsFormat;
|
||||
|
@ -520,4 +526,51 @@ public class _TestUtil {
|
|||
|
||||
return doc2;
|
||||
}
|
||||
|
||||
// Returns a DocsEnum, but randomly sometimes uses a
|
||||
// DocsAndFreqsEnum, DocsAndPositionsEnum. Returns null
|
||||
// if field/term doesn't exist:
|
||||
public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
final Terms terms = MultiFields.getTerms(r, field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (!termsEnum.seekExact(term, random.nextBoolean())) {
|
||||
return null;
|
||||
}
|
||||
if (random.nextBoolean()) {
|
||||
if (random.nextBoolean()) {
|
||||
// TODO: cast re-use to D&PE if we can...?
|
||||
final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
|
||||
if (docsAndPositions != null) {
|
||||
return docsAndPositions;
|
||||
}
|
||||
}
|
||||
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, reuse, true);
|
||||
if (docsAndFreqs != null) {
|
||||
return docsAndFreqs;
|
||||
}
|
||||
}
|
||||
return termsEnum.docs(liveDocs, reuse, needsFreqs);
|
||||
}
|
||||
|
||||
// Returns a DocsEnum from a positioned TermsEnum, but
|
||||
// randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum.
|
||||
public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException {
|
||||
if (random.nextBoolean()) {
|
||||
if (random.nextBoolean()) {
|
||||
// TODO: cast re-use to D&PE if we can...?
|
||||
final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null);
|
||||
if (docsAndPositions != null) {
|
||||
return docsAndPositions;
|
||||
}
|
||||
}
|
||||
final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, null, true);
|
||||
if (docsAndFreqs != null) {
|
||||
return docsAndFreqs;
|
||||
}
|
||||
}
|
||||
return termsEnum.docs(liveDocs, null, needsFreqs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -532,7 +532,7 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
private void verifyTermDocs(Directory dir, Term term, int numDocs)
|
||||
throws IOException {
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes);
|
||||
DocsEnum docsEnum = _TestUtil.docs(random, reader, term.field, term.bytes, null, null, false);
|
||||
int count = 0;
|
||||
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
|
||||
count++;
|
||||
|
|
|
@ -599,7 +599,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
// should be found exactly
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND,
|
||||
terms.seekCeil(aaaTerm));
|
||||
assertEquals(35, countDocs(terms.docs(null, null)));
|
||||
assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false)));
|
||||
assertNull(terms.next());
|
||||
|
||||
// should hit end of field
|
||||
|
@ -611,12 +611,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND,
|
||||
terms.seekCeil(new BytesRef("a")));
|
||||
assertTrue(terms.term().bytesEquals(aaaTerm));
|
||||
assertEquals(35, countDocs(terms.docs(null, null)));
|
||||
assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false)));
|
||||
assertNull(terms.next());
|
||||
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND,
|
||||
terms.seekCeil(aaaTerm));
|
||||
assertEquals(35, countDocs(terms.docs(null, null)));
|
||||
assertEquals(35, countDocs(_TestUtil.docs(random, terms,null, null, false)));
|
||||
assertNull(terms.next());
|
||||
|
||||
r.close();
|
||||
|
|
|
@ -279,7 +279,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
// make sure it properly fully resets (rewinds) its
|
||||
// internal state:
|
||||
for(int iter=0;iter<2;iter++) {
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false);
|
||||
assertEquals(terms[i].docs[0], docsEnum.nextDoc());
|
||||
assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
|
@ -479,7 +479,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
assertEquals(status, TermsEnum.SeekStatus.FOUND);
|
||||
assertEquals(term.docs.length, termsEnum.docFreq());
|
||||
if (field.omitTF) {
|
||||
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
|
||||
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
|
||||
} else {
|
||||
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
|
||||
}
|
||||
|
@ -499,7 +499,7 @@ public class TestCodecs extends LuceneTestCase {
|
|||
assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2)));
|
||||
assertEquals(term.docs.length, termsEnum.docFreq());
|
||||
if (field.omitTF) {
|
||||
this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false);
|
||||
this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false);
|
||||
} else {
|
||||
this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true);
|
||||
}
|
||||
|
@ -549,15 +549,22 @@ public class TestCodecs extends LuceneTestCase {
|
|||
do {
|
||||
term = field.terms[upto];
|
||||
if (TestCodecs.random.nextInt(3) == 1) {
|
||||
final DocsEnum docs = termsEnum.docs(null, null);
|
||||
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(null, null);
|
||||
|
||||
final DocsEnum docsEnum;
|
||||
if (postings != null) {
|
||||
docsEnum = postings;
|
||||
final DocsEnum docs;
|
||||
final DocsEnum docsAndFreqs;
|
||||
final DocsAndPositionsEnum postings;
|
||||
if (!field.omitTF) {
|
||||
postings = termsEnum.docsAndPositions(null, null);
|
||||
if (postings != null) {
|
||||
docs = docsAndFreqs = postings;
|
||||
} else {
|
||||
docs = docsAndFreqs = _TestUtil.docs(random, termsEnum, null, null, true);
|
||||
}
|
||||
} else {
|
||||
docsEnum = docs;
|
||||
postings = null;
|
||||
docsAndFreqs = null;
|
||||
docs = _TestUtil.docs(random, termsEnum, null, null, false);
|
||||
}
|
||||
assertNotNull(docs);
|
||||
int upto2 = -1;
|
||||
while(upto2 < term.docs.length-1) {
|
||||
// Maybe skip:
|
||||
|
@ -567,10 +574,10 @@ public class TestCodecs extends LuceneTestCase {
|
|||
final int inc = 1+TestCodecs.random.nextInt(left-1);
|
||||
upto2 += inc;
|
||||
if (TestCodecs.random.nextInt(2) == 1) {
|
||||
doc = docsEnum.advance(term.docs[upto2]);
|
||||
doc = docs.advance(term.docs[upto2]);
|
||||
assertEquals(term.docs[upto2], doc);
|
||||
} else {
|
||||
doc = docsEnum.advance(1+term.docs[upto2]);
|
||||
doc = docs.advance(1+term.docs[upto2]);
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
// skipped past last doc
|
||||
assert upto2 == term.docs.length-1;
|
||||
|
@ -584,20 +591,20 @@ public class TestCodecs extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
doc = docsEnum.nextDoc();
|
||||
doc = docs.nextDoc();
|
||||
assertTrue(doc != -1);
|
||||
upto2++;
|
||||
}
|
||||
assertEquals(term.docs[upto2], doc);
|
||||
if (!field.omitTF) {
|
||||
assertEquals(term.positions[upto2].length, docsEnum.freq());
|
||||
assertEquals(term.positions[upto2].length, postings.freq());
|
||||
if (TestCodecs.random.nextInt(2) == 1) {
|
||||
this.verifyPositions(term.positions[upto2], postings);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc());
|
||||
}
|
||||
upto++;
|
||||
|
||||
|
|
|
@ -17,7 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -25,9 +26,8 @@ import org.apache.lucene.document.TextField;
|
|||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestDirectoryReader extends LuceneTestCase {
|
||||
protected Directory dir;
|
||||
|
@ -129,15 +129,18 @@ public class TestDirectoryReader extends LuceneTestCase {
|
|||
// test mixing up TermDocs and TermEnums from different readers.
|
||||
TermsEnum te2 = MultiFields.getTerms(mr2, "body").iterator(null);
|
||||
te2.seekCeil(new BytesRef("wow"));
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(mr2,
|
||||
MultiFields.getLiveDocs(mr2),
|
||||
"body",
|
||||
te2.term());
|
||||
DocsEnum td = _TestUtil.docs(random, mr2,
|
||||
"body",
|
||||
te2.term(),
|
||||
MultiFields.getLiveDocs(mr2),
|
||||
null,
|
||||
false);
|
||||
|
||||
TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator(null);
|
||||
te3.seekCeil(new BytesRef("wow"));
|
||||
td = te3.docs(MultiFields.getLiveDocs(mr3),
|
||||
td);
|
||||
td = _TestUtil.docs(random, te3, MultiFields.getLiveDocs(mr3),
|
||||
td,
|
||||
false);
|
||||
|
||||
int ret = 0;
|
||||
|
||||
|
|
|
@ -68,11 +68,14 @@ public class TestDocCount extends LuceneTestCase {
|
|||
String field;
|
||||
while ((field = e.next()) != null) {
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
int docCount = terms.getDocCount();
|
||||
FixedBitSet visited = new FixedBitSet(ir.maxDoc());
|
||||
TermsEnum te = terms.iterator(null);
|
||||
while (te.next() != null) {
|
||||
DocsEnum de = te.docs(null, null);
|
||||
DocsEnum de = _TestUtil.docs(random, te, null, null, false);
|
||||
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
visited.set(de.docID());
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
@ -34,6 +33,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ReaderUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestDocsAndPositions extends LuceneTestCase {
|
||||
private String fieldName;
|
||||
|
@ -99,16 +99,6 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
return reader.termPositionsEnum(null, fieldName, bytes);
|
||||
}
|
||||
|
||||
public DocsEnum getDocsEnum(IndexReader reader, BytesRef bytes,
|
||||
boolean freqs, Bits liveDocs) throws IOException {
|
||||
int randInt = random.nextInt(10);
|
||||
if (randInt == 0) { // once in a while throw in a positions enum
|
||||
return getDocsAndPositions(reader, bytes, liveDocs);
|
||||
} else {
|
||||
return reader.termDocsEnum(liveDocs, fieldName, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* this test indexes random numbers within a range into a field and checks
|
||||
* their occurrences by searching for a number from that range selected at
|
||||
|
@ -232,31 +222,31 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext);
|
||||
for (AtomicReaderContext context : leaves) {
|
||||
int maxDoc = context.reader.maxDoc();
|
||||
DocsEnum docsAndPosEnum = getDocsEnum(context.reader, bytes, true, null);
|
||||
DocsEnum docsEnum = _TestUtil.docs(random, context.reader, fieldName, bytes, null, null, true);
|
||||
if (findNext(freqInDoc, context.docBase, context.docBase + maxDoc) == Integer.MAX_VALUE) {
|
||||
assertNull(docsAndPosEnum);
|
||||
assertNull(docsEnum);
|
||||
continue;
|
||||
}
|
||||
assertNotNull(docsAndPosEnum);
|
||||
docsAndPosEnum.nextDoc();
|
||||
assertNotNull(docsEnum);
|
||||
docsEnum.nextDoc();
|
||||
for (int j = 0; j < maxDoc; j++) {
|
||||
if (freqInDoc[context.docBase + j] != 0) {
|
||||
assertEquals(j, docsAndPosEnum.docID());
|
||||
assertEquals(docsAndPosEnum.freq(), freqInDoc[context.docBase +j]);
|
||||
assertEquals(j, docsEnum.docID());
|
||||
assertEquals(docsEnum.freq(), freqInDoc[context.docBase +j]);
|
||||
if (i % 2 == 0 && random.nextInt(10) == 0) {
|
||||
int next = findNext(freqInDoc, context.docBase+j+1, context.docBase + maxDoc) - context.docBase;
|
||||
int advancedTo = docsAndPosEnum.advance(next);
|
||||
int advancedTo = docsEnum.advance(next);
|
||||
if (next >= maxDoc) {
|
||||
assertEquals(DocsEnum.NO_MORE_DOCS, advancedTo);
|
||||
} else {
|
||||
assertTrue("advanced to: " +advancedTo + " but should be <= " + next, next >= advancedTo);
|
||||
}
|
||||
} else {
|
||||
docsAndPosEnum.nextDoc();
|
||||
docsEnum.nextDoc();
|
||||
}
|
||||
}
|
||||
}
|
||||
assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsAndPosEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsAndPosEnum.docID());
|
||||
assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsEnum.docID());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -343,7 +333,7 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexReader r = getOnlySegmentReader(reader);
|
||||
DocsEnum disi = r.termDocsEnum(null, "foo", new BytesRef("bar"));
|
||||
DocsEnum disi = _TestUtil.docs(random, r, "foo", new BytesRef("bar"), null, null, false);
|
||||
int docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
@ -351,7 +341,7 @@ public class TestDocsAndPositions extends LuceneTestCase {
|
|||
// now reuse and check again
|
||||
TermsEnum te = r.terms("foo").iterator(null);
|
||||
assertTrue(te.seekExact(new BytesRef("bar"), true));
|
||||
disi = te.docs(null, disi);
|
||||
disi = _TestUtil.docs(random, te, null, disi, false);
|
||||
docid = disi.docID();
|
||||
assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
|
|
@ -270,18 +270,42 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
|
||||
// with freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, true),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, true),
|
||||
true);
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, false),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, false),
|
||||
false);
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false),
|
||||
false);
|
||||
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
|
||||
|
||||
// with freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs));
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs, true),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, true),
|
||||
true);
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true),
|
||||
true);
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs, false),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, false),
|
||||
false);
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false),
|
||||
false);
|
||||
}
|
||||
}
|
||||
assertNull(info, rightTermsEnum.next());
|
||||
|
@ -327,13 +351,19 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
/**
|
||||
* checks docs + freqs, sequentially
|
||||
*/
|
||||
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
|
||||
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS);
|
||||
int docid;
|
||||
while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(info, docid, rightDocs.nextDoc());
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc());
|
||||
}
|
||||
|
@ -341,7 +371,11 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
/**
|
||||
* checks advancing docs
|
||||
*/
|
||||
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
|
||||
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
}
|
||||
int docid = -1;
|
||||
int averageGap = leftReader.maxDoc() / (1+docFreq);
|
||||
int skipInterval = 16;
|
||||
|
@ -361,7 +395,9 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
if (docid == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return;
|
||||
}
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
if (hasFreqs) {
|
||||
assertEquals(info, leftDocs.freq(), rightDocs.freq());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -264,18 +264,20 @@ public class TestIndexReader extends LuceneTestCase {
|
|||
Term term,
|
||||
int expected)
|
||||
throws IOException {
|
||||
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
term.field(),
|
||||
new BytesRef(term.text()));
|
||||
int count = 0;
|
||||
if (tdocs != null) {
|
||||
while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
assertEquals(msg + ", count mismatch", expected, count);
|
||||
DocsEnum tdocs = _TestUtil.docs(random, reader,
|
||||
term.field(),
|
||||
new BytesRef(term.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
int count = 0;
|
||||
if (tdocs != null) {
|
||||
while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
assertEquals(msg + ", count mismatch", expected, count);
|
||||
}
|
||||
|
||||
|
||||
public void testBinaryFields() throws IOException {
|
||||
|
|
|
@ -546,10 +546,12 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
assertEquals(1, reader.numDocs());
|
||||
Term t = new Term("field", "a");
|
||||
assertEquals(1, reader.docFreq(t));
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"field",
|
||||
new BytesRef("a"));
|
||||
DocsEnum td = _TestUtil.docs(random, reader,
|
||||
"field",
|
||||
new BytesRef("a"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
true);
|
||||
td.nextDoc();
|
||||
assertEquals(128*1024, td.freq());
|
||||
reader.close();
|
||||
|
@ -1334,12 +1336,12 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
|
||||
|
||||
// test that the terms were indexed.
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc1field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc2field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc3field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc1field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc2field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc3field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
||||
ir.close();
|
||||
dir.close();
|
||||
|
@ -1411,7 +1413,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
TermsEnum t = r.fields().terms("field").iterator(null);
|
||||
int count = 0;
|
||||
while(t.next() != null) {
|
||||
final DocsEnum docs = t.docs(null, null);
|
||||
final DocsEnum docs = _TestUtil.docs(random, t, null, null, false);
|
||||
assertEquals(0, docs.nextDoc());
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc());
|
||||
count++;
|
||||
|
|
|
@ -500,10 +500,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
|
||||
// Make sure the doc that hit the exception was marked
|
||||
// as deleted:
|
||||
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
t.field(),
|
||||
new BytesRef(t.text()));
|
||||
DocsEnum tdocs = _TestUtil.docs(random, reader,
|
||||
t.field(),
|
||||
new BytesRef(t.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
|
||||
int count = 0;
|
||||
while(tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
|
|
@ -49,9 +49,11 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
|
||||
public static int count(Term t, IndexReader r) throws IOException {
|
||||
int count = 0;
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(r,
|
||||
MultiFields.getLiveDocs(r),
|
||||
t.field(), new BytesRef(t.text()));
|
||||
DocsEnum td = _TestUtil.docs(random, r,
|
||||
t.field(), new BytesRef(t.text()),
|
||||
MultiFields.getLiveDocs(r),
|
||||
null,
|
||||
false);
|
||||
|
||||
if (td != null) {
|
||||
while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -990,7 +992,7 @@ public class TestIndexWriterReader extends LuceneTestCase {
|
|||
w.addDocument(doc);
|
||||
IndexReader r = IndexReader.open(w, true).getSequentialSubReaders()[0];
|
||||
try {
|
||||
r.termDocsEnum(null, "f", new BytesRef("val"));
|
||||
_TestUtil.docs(random, r, "f", new BytesRef("val"), null, null, false);
|
||||
fail("should have failed to seek since terms index was not loaded.");
|
||||
} catch (IllegalStateException e) {
|
||||
// expected - we didn't load the term index
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* MultiThreaded IndexWriter tests
|
||||
|
@ -209,10 +210,12 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
|
|||
|
||||
// Quick test to make sure index is not corrupt:
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"field",
|
||||
new BytesRef("aaa"));
|
||||
DocsEnum tdocs = _TestUtil.docs(random, reader,
|
||||
"field",
|
||||
new BytesRef("aaa"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
int count = 0;
|
||||
while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
count++;
|
||||
|
|
|
@ -367,7 +367,17 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term);
|
||||
}
|
||||
|
||||
final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term));
|
||||
final DocsEnum docs;
|
||||
final DocsEnum postings;
|
||||
|
||||
if (options == IndexOptions.DOCS_ONLY) {
|
||||
docs = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, false);
|
||||
postings = null;
|
||||
} else {
|
||||
docs = postings = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, true);
|
||||
assert postings != null;
|
||||
}
|
||||
assert docs != null;
|
||||
|
||||
int docID = -1;
|
||||
while(docID < DocsEnum.NO_MORE_DOCS) {
|
||||
|
@ -388,7 +398,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
expected++;
|
||||
}
|
||||
}
|
||||
docID = postings.nextDoc();
|
||||
docID = docs.nextDoc();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got docID=" + docID);
|
||||
}
|
||||
|
@ -397,7 +407,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
|
||||
if (random.nextInt(6) == 3) {
|
||||
if (random.nextInt(6) == 3 && postings != null) {
|
||||
final int freq = postings.freq();
|
||||
assertTrue(freq >=1 && freq <= 4);
|
||||
}
|
||||
|
@ -424,7 +434,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
docID = postings.advance(targetDocID);
|
||||
docID = docs.advance(targetDocID);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got docID=" + docID);
|
||||
}
|
||||
|
@ -433,7 +443,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
|
||||
if (random.nextInt(6) == 3) {
|
||||
if (random.nextInt(6) == 3 && postings != null) {
|
||||
final int freq = postings.freq();
|
||||
assertTrue("got invalid freq=" + freq, freq >=1 && freq <= 4);
|
||||
}
|
||||
|
|
|
@ -120,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term);
|
||||
}
|
||||
|
||||
DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term);
|
||||
DocsEnum docsEnum = _TestUtil.docs(random, reader, "field", term, liveDocs, null, false);
|
||||
assertNotNull(docsEnum);
|
||||
|
||||
for(int docID : docs.get(term)) {
|
||||
|
@ -138,11 +138,12 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
|
||||
/*
|
||||
private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(r,
|
||||
MultiFields.getLiveDocs(r),
|
||||
"field",
|
||||
new BytesRef(term));
|
||||
|
||||
DocsEnum docs = _TestUtil.docs(random, r,
|
||||
"field",
|
||||
new BytesRef(term),
|
||||
MultiFields.getLiveDocs(r),
|
||||
null,
|
||||
false);
|
||||
for(int docID : expected) {
|
||||
assertEquals(docID, docs.nextDoc());
|
||||
}
|
||||
|
@ -160,8 +161,8 @@ public class TestMultiFields extends LuceneTestCase {
|
|||
w.addDocument(d);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
DocsEnum d1 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
|
||||
DocsEnum d2 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j"));
|
||||
DocsEnum d1 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false);
|
||||
DocsEnum d2 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false);
|
||||
assertEquals(0, d1.nextDoc());
|
||||
assertEquals(0, d2.nextDoc());
|
||||
r.close();
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -52,7 +53,7 @@ public class TestOmitPositions extends LuceneTestCase {
|
|||
|
||||
assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test")));
|
||||
|
||||
DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test"));
|
||||
DocsEnum de = _TestUtil.docs(random, reader, "foo", new BytesRef("test"), null, null, true);
|
||||
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
assertEquals(2, de.freq());
|
||||
}
|
||||
|
|
|
@ -19,12 +19,13 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestParallelTermEnum extends LuceneTestCase {
|
||||
private IndexReader ir1;
|
||||
|
@ -88,31 +89,31 @@ public class TestParallelTermEnum extends LuceneTestCase {
|
|||
TermsEnum te = terms.iterator(null);
|
||||
|
||||
assertEquals("brown", te.next().utf8ToString());
|
||||
DocsEnum td = te.docs(liveDocs, null);
|
||||
DocsEnum td = _TestUtil.docs(random, te, liveDocs, null, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("fox", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("jumps", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("quick", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("the", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
@ -125,31 +126,31 @@ public class TestParallelTermEnum extends LuceneTestCase {
|
|||
te = terms.iterator(null);
|
||||
|
||||
assertEquals("brown", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("fox", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("jumps", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("quick", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("the", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
@ -162,37 +163,37 @@ public class TestParallelTermEnum extends LuceneTestCase {
|
|||
te = terms.iterator(null);
|
||||
|
||||
assertEquals("dog", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("fox", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("jumps", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("lazy", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("over", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
assertEquals("the", te.next().utf8ToString());
|
||||
td = te.docs(liveDocs, td);
|
||||
td = _TestUtil.docs(random, te, liveDocs, td, false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, td.docID());
|
||||
assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS);
|
||||
|
|
|
@ -20,8 +20,8 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -31,6 +31,7 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestPerSegmentDeletes extends LuceneTestCase {
|
||||
public void testDeletes1() throws Exception {
|
||||
|
@ -224,7 +225,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase {
|
|||
Terms cterms = fields.terms(term.field);
|
||||
TermsEnum ctermsEnum = cterms.iterator(null);
|
||||
if (ctermsEnum.seekExact(new BytesRef(term.text()), false)) {
|
||||
DocsEnum docsEnum = ctermsEnum.docs(bits, null);
|
||||
DocsEnum docsEnum = _TestUtil.docs(random, ctermsEnum, bits, null, false);
|
||||
return toArray(docsEnum);
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -17,18 +17,19 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import org.apache.lucene.util.InfoStream;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestSegmentMerger extends LuceneTestCase {
|
||||
//The variables for the new merged segment
|
||||
|
@ -98,10 +99,12 @@ public class TestSegmentMerger extends LuceneTestCase {
|
|||
assertTrue(newDoc2 != null);
|
||||
assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size());
|
||||
|
||||
DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader,
|
||||
MultiFields.getLiveDocs(mergedReader),
|
||||
DocHelper.TEXT_FIELD_2_KEY,
|
||||
new BytesRef("field"));
|
||||
DocsEnum termDocs = _TestUtil.docs(random, mergedReader,
|
||||
DocHelper.TEXT_FIELD_2_KEY,
|
||||
new BytesRef("field"),
|
||||
MultiFields.getLiveDocs(mergedReader),
|
||||
null,
|
||||
false);
|
||||
assertTrue(termDocs != null);
|
||||
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
|
|
|
@ -22,12 +22,12 @@ import java.util.Collection;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestSegmentReader extends LuceneTestCase {
|
||||
private Directory dir;
|
||||
|
@ -132,16 +132,20 @@ public class TestSegmentReader extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
DocsEnum termDocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
DocHelper.TEXT_FIELD_1_KEY,
|
||||
new BytesRef("field"));
|
||||
DocsEnum termDocs = _TestUtil.docs(random, reader,
|
||||
DocHelper.TEXT_FIELD_1_KEY,
|
||||
new BytesRef("field"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
termDocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
DocHelper.NO_NORMS_KEY,
|
||||
new BytesRef(DocHelper.NO_NORMS_TEXT));
|
||||
termDocs = _TestUtil.docs(random, reader,
|
||||
DocHelper.NO_NORMS_KEY,
|
||||
new BytesRef(DocHelper.NO_NORMS_TEXT),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
|
||||
assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
|
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestSegmentTermDocs extends LuceneTestCase {
|
||||
private Document testDoc = new Document();
|
||||
|
@ -61,7 +62,7 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
|
||||
TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null);
|
||||
terms.seekCeil(new BytesRef("field"));
|
||||
DocsEnum termDocs = terms.docs(reader.getLiveDocs(), null);
|
||||
DocsEnum termDocs = _TestUtil.docs(random, terms, reader.getLiveDocs(), null, true);
|
||||
if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
int docId = termDocs.docID();
|
||||
assertTrue(docId == 0);
|
||||
|
@ -80,9 +81,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
//After adding the document, we should be able to read it back in
|
||||
SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random));
|
||||
assertTrue(reader != null);
|
||||
DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(),
|
||||
"textField2",
|
||||
new BytesRef("bad"));
|
||||
DocsEnum termDocs = _TestUtil.docs(random, reader,
|
||||
"textField2",
|
||||
new BytesRef("bad"),
|
||||
reader.getLiveDocs(),
|
||||
null,
|
||||
false);
|
||||
|
||||
assertNull(termDocs);
|
||||
reader.close();
|
||||
|
@ -91,9 +95,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
//After adding the document, we should be able to read it back in
|
||||
SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random));
|
||||
assertTrue(reader != null);
|
||||
DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(),
|
||||
"junk",
|
||||
new BytesRef("bad"));
|
||||
DocsEnum termDocs = _TestUtil.docs(random, reader,
|
||||
"junk",
|
||||
new BytesRef("bad"),
|
||||
reader.getLiveDocs(),
|
||||
null,
|
||||
false);
|
||||
assertNull(termDocs);
|
||||
reader.close();
|
||||
}
|
||||
|
@ -125,10 +132,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
|
||||
IndexReader reader = IndexReader.open(dir, null, true, indexDivisor);
|
||||
|
||||
DocsEnum tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
ta.field(),
|
||||
new BytesRef(ta.text()));
|
||||
DocsEnum tdocs = _TestUtil.docs(random, reader,
|
||||
ta.field(),
|
||||
new BytesRef(ta.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
true);
|
||||
|
||||
// without optimization (assumption skipInterval == 16)
|
||||
|
||||
|
@ -148,10 +157,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
// without next
|
||||
tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
ta.field(),
|
||||
new BytesRef(ta.text()));
|
||||
tdocs = _TestUtil.docs(random, reader,
|
||||
ta.field(),
|
||||
new BytesRef(ta.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
|
||||
assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(0, tdocs.docID());
|
||||
|
@ -164,10 +175,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
// exactly skipInterval documents and therefore with optimization
|
||||
|
||||
// with next
|
||||
tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
tb.field(),
|
||||
new BytesRef(tb.text()));
|
||||
tdocs = _TestUtil.docs(random, reader,
|
||||
tb.field(),
|
||||
new BytesRef(tb.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
true);
|
||||
|
||||
assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(10, tdocs.docID());
|
||||
|
@ -186,10 +199,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
// without next
|
||||
tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
tb.field(),
|
||||
new BytesRef(tb.text()));
|
||||
tdocs = _TestUtil.docs(random, reader,
|
||||
tb.field(),
|
||||
new BytesRef(tb.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
true);
|
||||
|
||||
assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(10, tdocs.docID());
|
||||
|
@ -204,10 +219,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
// much more than skipInterval documents and therefore with optimization
|
||||
|
||||
// with next
|
||||
tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
tc.field(),
|
||||
new BytesRef(tc.text()));
|
||||
tdocs = _TestUtil.docs(random, reader,
|
||||
tc.field(),
|
||||
new BytesRef(tc.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
true);
|
||||
|
||||
assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(26, tdocs.docID());
|
||||
|
@ -228,10 +245,12 @@ public class TestSegmentTermDocs extends LuceneTestCase {
|
|||
assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS);
|
||||
|
||||
//without next
|
||||
tdocs = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
tc.field(),
|
||||
new BytesRef(tc.text()));
|
||||
tdocs = _TestUtil.docs(random, reader,
|
||||
tc.field(),
|
||||
new BytesRef(tc.text()),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(26, tdocs.docID());
|
||||
assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS);
|
||||
|
|
|
@ -75,11 +75,11 @@ public class TestStressAdvance extends LuceneTestCase {
|
|||
System.out.println("\nTEST: iter=" + iter + " iter2=" + iter2);
|
||||
}
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("a")));
|
||||
de = te.docs(null, de);
|
||||
de = _TestUtil.docs(random, te, null, de, false);
|
||||
testOne(de, aDocIDs);
|
||||
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("b")));
|
||||
de = te.docs(null, de);
|
||||
de = _TestUtil.docs(random, te, null, de, false);
|
||||
testOne(de, bDocIDs);
|
||||
}
|
||||
|
||||
|
|
|
@ -334,7 +334,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
// deleted docs):
|
||||
DocsEnum docs = null;
|
||||
while(termsEnum.next() != null) {
|
||||
docs = termsEnum.docs(liveDocs1, docs);
|
||||
docs = _TestUtil.docs(random, termsEnum, null, docs, false);
|
||||
while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
fail("r1 is not empty but r2 is");
|
||||
}
|
||||
|
@ -354,9 +354,9 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
|
||||
termDocs1 = termsEnum.docs(liveDocs1, termDocs1);
|
||||
termDocs1 = _TestUtil.docs(random, termsEnum, liveDocs1, termDocs1, false);
|
||||
if (termsEnum2.seekExact(term, false)) {
|
||||
termDocs2 = termsEnum2.docs(liveDocs2, termDocs2);
|
||||
termDocs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, termDocs2, false);
|
||||
} else {
|
||||
termDocs2 = null;
|
||||
}
|
||||
|
@ -415,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||
}
|
||||
} else {
|
||||
dEnum = termsEnum3.docs(null, dEnum);
|
||||
dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true);
|
||||
assertNotNull(dEnum);
|
||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dEnum.freq();
|
||||
|
@ -449,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
System.out.println(" pos=" + dpEnum.nextPosition());
|
||||
}
|
||||
} else {
|
||||
dEnum = termsEnum3.docs(null, dEnum);
|
||||
dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true);
|
||||
assertNotNull(dEnum);
|
||||
assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
final int freq = dEnum.freq();
|
||||
|
@ -506,7 +506,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
//System.out.println("TEST: term1=" + term1);
|
||||
docs1 = termsEnum1.docs(liveDocs1, docs1);
|
||||
docs1 = _TestUtil.docs(random, termsEnum1, liveDocs1, docs1, true);
|
||||
while (docs1.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
int d = docs1.docID();
|
||||
int f = docs1.freq();
|
||||
|
@ -540,7 +540,7 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
//System.out.println("TEST: term1=" + term1);
|
||||
docs2 = termsEnum2.docs(liveDocs2, docs2);
|
||||
docs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, docs2, true);
|
||||
while (docs2.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
int d = r2r1[docs2.docID()];
|
||||
int f = docs2.freq();
|
||||
|
@ -667,8 +667,8 @@ public class TestStressIndexing2 extends LuceneTestCase {
|
|||
assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum1.nextDoc());
|
||||
assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum2.nextDoc());
|
||||
} else {
|
||||
dEnum1 = termsEnum1.docs(null, dEnum1);
|
||||
dEnum2 = termsEnum2.docs(null, dEnum2);
|
||||
dEnum1 = _TestUtil.docs(random, termsEnum1, null, dEnum1, true);
|
||||
dEnum2 = _TestUtil.docs(random, termsEnum2, null, dEnum2, true);
|
||||
assertNotNull(dEnum1);
|
||||
assertNotNull(dEnum2);
|
||||
int docID1 = dEnum1.nextDoc();
|
||||
|
|
|
@ -24,9 +24,9 @@ import java.util.HashSet;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -37,6 +37,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTermVectorsReader extends LuceneTestCase {
|
||||
//Must be lexicographically sorted, will do in setup, versus trying to maintain here
|
||||
|
@ -231,7 +232,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
//System.out.println("Term: " + term);
|
||||
assertEquals(testTerms[i], term);
|
||||
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false);
|
||||
assertNotNull(docsEnum);
|
||||
int doc = docsEnum.docID();
|
||||
assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
|
|
@ -28,8 +28,9 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
class RepeatingTokenStream extends Tokenizer {
|
||||
|
||||
|
@ -121,7 +122,7 @@ public class TestTermdocPerf extends LuceneTestCase {
|
|||
DocsEnum tdocs = null;
|
||||
for (int i=0; i<iter; i++) {
|
||||
tenum.seekCeil(new BytesRef("val"));
|
||||
tdocs = tenum.docs(MultiFields.getLiveDocs(reader), tdocs);
|
||||
tdocs = _TestUtil.docs(random, tenum, MultiFields.getLiveDocs(reader), tdocs, false);
|
||||
while (tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
ret += tdocs.docID();
|
||||
}
|
||||
|
|
|
@ -331,7 +331,7 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(expected, actual);
|
||||
assertEquals(1, te.docFreq());
|
||||
docsEnum = te.docs(null, docsEnum);
|
||||
docsEnum = _TestUtil.docs(random, te, null, docsEnum, false);
|
||||
final int docID = docsEnum.nextDoc();
|
||||
assertTrue(docID != DocsEnum.NO_MORE_DOCS);
|
||||
assertEquals(docIDToID[docID], termToID.get(expected).intValue());
|
||||
|
|
|
@ -85,7 +85,7 @@ public class Test10KPulsings extends LuceneTestCase {
|
|||
for (int i = 0; i < 10050; i++) {
|
||||
String expected = df.format(i);
|
||||
assertEquals(expected, te.next().utf8ToString());
|
||||
de = te.docs(null, de);
|
||||
de = _TestUtil.docs(random, te, null, de, false);
|
||||
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
|
||||
}
|
||||
|
@ -143,7 +143,7 @@ public class Test10KPulsings extends LuceneTestCase {
|
|||
for (int i = 0; i < 10050; i++) {
|
||||
String expected = df.format(i);
|
||||
assertEquals(expected, te.next().utf8ToString());
|
||||
de = te.docs(null, de);
|
||||
de = _TestUtil.docs(random, te, null, de, false);
|
||||
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, de.nextDoc());
|
||||
}
|
||||
|
|
|
@ -59,7 +59,7 @@ public class TestPulsingReuse extends LuceneTestCase {
|
|||
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
|
||||
TermsEnum te = segment.terms("foo").iterator(null);
|
||||
while (te.next() != null) {
|
||||
reuse = te.docs(null, reuse);
|
||||
reuse = te.docs(null, reuse, false);
|
||||
allEnums.put(reuse, true);
|
||||
}
|
||||
|
||||
|
@ -101,7 +101,7 @@ public class TestPulsingReuse extends LuceneTestCase {
|
|||
Map<DocsEnum,Boolean> allEnums = new IdentityHashMap<DocsEnum,Boolean>();
|
||||
TermsEnum te = segment.terms("foo").iterator(null);
|
||||
while (te.next() != null) {
|
||||
reuse = te.docs(null, reuse);
|
||||
reuse = te.docs(null, reuse, false);
|
||||
allEnums.put(reuse, true);
|
||||
}
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestTermVectors extends LuceneTestCase {
|
||||
private IndexSearcher searcher;
|
||||
|
@ -269,7 +270,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
|
||||
while (termsEnum.next() != null) {
|
||||
String text = termsEnum.term().utf8ToString();
|
||||
docs = termsEnum.docs(MultiFields.getLiveDocs(knownSearcher.reader), docs);
|
||||
docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs, true);
|
||||
|
||||
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
int docId = docs.docID();
|
||||
|
|
|
@ -26,15 +26,16 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -95,15 +96,21 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"partnum",
|
||||
new BytesRef("Q36"));
|
||||
DocsEnum td = _TestUtil.docs(random,
|
||||
reader,
|
||||
"partnum",
|
||||
new BytesRef("Q36"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
td = MultiFields.getTermDocsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"partnum",
|
||||
new BytesRef("Q37"));
|
||||
td = _TestUtil.docs(random,
|
||||
reader,
|
||||
"partnum",
|
||||
new BytesRef("Q37"),
|
||||
MultiFields.getLiveDocs(reader),
|
||||
null,
|
||||
false);
|
||||
assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.lucene.search.FieldCache.DocTermsIndex;
|
|||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Test very simply that perf tasks - simple algorithms - are doing what they should.
|
||||
|
@ -493,7 +494,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
TermsEnum termsEnum = terms.iterator(null);
|
||||
DocsEnum docs = null;
|
||||
while(termsEnum.next() != null) {
|
||||
docs = termsEnum.docs(MultiFields.getLiveDocs(reader), docs);
|
||||
docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(reader), docs, true);
|
||||
while(docs.nextDoc() != docs.NO_MORE_DOCS) {
|
||||
totalTokenCount2 += docs.freq();
|
||||
}
|
||||
|
|
|
@ -106,8 +106,10 @@ class TakmiSampleFixer implements SampleFixer {
|
|||
Term drillDownTerm = DrillDown.term(searchParams, catPath);
|
||||
// TODO (Facet): avoid Multi*?
|
||||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
||||
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs, drillDownTerm.field(), drillDownTerm.bytes()),
|
||||
docIds.iterator());
|
||||
int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
|
||||
drillDownTerm.field(), drillDownTerm.bytes(),
|
||||
false),
|
||||
docIds.iterator());
|
||||
|
||||
fresNode.setValue(updatedCount);
|
||||
}
|
||||
|
|
|
@ -191,7 +191,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
|||
indexReaderLock.readLock().lock();
|
||||
// TODO (Facet): avoid Multi*?
|
||||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path));
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), false);
|
||||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ret = docs.docID();
|
||||
}
|
||||
|
|
|
@ -405,7 +405,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// TODO (Facet): avoid Multi*?
|
||||
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter)));
|
||||
new BytesRef(categoryPath.toString(delimiter)),
|
||||
false);
|
||||
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return -1; // category does not exist in taxonomy
|
||||
}
|
||||
|
@ -441,7 +442,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter, prefixLen)));
|
||||
new BytesRef(categoryPath.toString(delimiter, prefixLen)),
|
||||
false);
|
||||
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return -1; // category does not exist in taxonomy
|
||||
}
|
||||
|
@ -788,7 +790,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// hence documents), there are no deletions in the index. Therefore, it
|
||||
// is sufficient to call next(), and then doc(), exactly once with no
|
||||
// 'validation' checks.
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum);
|
||||
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
|
||||
docsEnum.nextDoc();
|
||||
cp.clear();
|
||||
// TODO (Facet): avoid String creation/use bytes?
|
||||
|
@ -925,7 +927,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// like Lucene's merge works, we hope there are few seeks.
|
||||
// TODO (Facet): is there a quicker way? E.g., not specifying the
|
||||
// next term by name every time?
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
ordinalMaps[i].addMapping(origordinal, newordinal);
|
||||
|
@ -942,7 +944,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// to be added because it already existed in the main taxonomy.
|
||||
|
||||
// TODO (Facet): Again, is there a quicker way?
|
||||
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde);
|
||||
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false);
|
||||
mainde.nextDoc(); // TODO (Facet): check?
|
||||
int newordinal = mainde.docID();
|
||||
|
||||
|
@ -950,7 +952,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (first.equals(currentOthers[i])) {
|
||||
// TODO (Facet): again, is there a quicker way?
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i]);
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
ordinalMaps[i].addMapping(origordinal, newordinal);
|
||||
|
|
|
@ -285,7 +285,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
|
|||
TermsEnum te = terms.iterator(null);
|
||||
DocsEnum de = null;
|
||||
while (te.next() != null) {
|
||||
de = te.docs(liveDocs, de);
|
||||
de = _TestUtil.docs(random, te, liveDocs, de, false);
|
||||
int cnt = 0;
|
||||
while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
cnt++;
|
||||
|
|
|
@ -6,31 +6,10 @@ import java.util.List;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.facet.FacetTestUtils;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
@ -40,6 +19,25 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiCollector;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TopScoreDocCollector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -91,7 +89,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
// Obtain facets results and hand-test them
|
||||
assertCorrectResults(facetsCollector);
|
||||
|
||||
DocsEnum td = MultiFields.getTermDocsEnum(ir, MultiFields.getLiveDocs(ir), "$facets", new BytesRef("$fulltree$"));
|
||||
DocsEnum td = _TestUtil.docs(random, ir, "$facets", new BytesRef("$fulltree$"), MultiFields.getLiveDocs(ir), null, false);
|
||||
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
|
||||
tr.close();
|
||||
|
@ -188,7 +186,7 @@ public class TestMultipleCategoryLists extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void assertPostingListExists(String field, String text, IndexReader ir) throws IOException {
|
||||
DocsEnum de = MultiFields.getTermDocsEnum(ir, null, field, new BytesRef(text));
|
||||
DocsEnum de = _TestUtil.docs(random, ir, field, new BytesRef(text), null, null, false);
|
||||
assertTrue(de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
}
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ public class TermsFilter extends Filter {
|
|||
if (terms != null) { // TODO this check doesn't make sense, decide which variable its supposed to be for
|
||||
br.copyBytes(term.bytes());
|
||||
if (termsEnum.seekCeil(br) == TermsEnum.SeekStatus.FOUND) {
|
||||
docs = termsEnum.docs(acceptDocs, docs);
|
||||
docs = termsEnum.docs(acceptDocs, docs, false);
|
||||
while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
}
|
||||
|
|
|
@ -62,7 +62,7 @@ public class TFValueSource extends TermFreqValueSource {
|
|||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||
docs = termsEnum.docs(null, null);
|
||||
docs = termsEnum.docs(null, null, true);
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
|
|
|
@ -55,7 +55,7 @@ public class TermFreqValueSource extends DocFreqValueSource {
|
|||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(indexedBytes, false)) {
|
||||
docs = termsEnum.docs(null, null);
|
||||
docs = termsEnum.docs(null, null, true);
|
||||
} else {
|
||||
docs = null;
|
||||
}
|
||||
|
|
|
@ -40,6 +40,14 @@ import org.apache.lucene.util.fst.NoOutputs;
|
|||
* @see FSTCompletion
|
||||
*/
|
||||
public class FSTCompletionLookup extends Lookup {
|
||||
/**
|
||||
* An invalid bucket count if we're creating an object
|
||||
* of this class from an existing FST.
|
||||
*
|
||||
* @see #FSTCompletionLookup(FSTCompletion, boolean)
|
||||
*/
|
||||
private static int INVALID_BUCKETS_COUNT = -1;
|
||||
|
||||
/**
|
||||
* Shared tail length for conflating in the created automaton. Setting this
|
||||
* to larger values ({@link Integer#MAX_VALUE}) will create smaller (or minimal)
|
||||
|
@ -70,34 +78,54 @@ public class FSTCompletionLookup extends Lookup {
|
|||
*/
|
||||
private FSTCompletion normalCompletion;
|
||||
|
||||
/*
|
||||
*
|
||||
/**
|
||||
* This constructor prepares for creating a suggested FST using the
|
||||
* {@link #build(TermFreqIterator)} method. The number of weight
|
||||
* discretization buckets is set to {@link FSTCompletion#DEFAULT_BUCKETS} and
|
||||
* exact matches are promoted to the top of the suggestions list.
|
||||
*/
|
||||
public FSTCompletionLookup() {
|
||||
this(FSTCompletion.DEFAULT_BUCKETS, true);
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* This constructor prepares for creating a suggested FST using the
|
||||
* {@link #build(TermFreqIterator)} method.
|
||||
*
|
||||
*/
|
||||
public FSTCompletionLookup(FSTCompletion completion, int buckets, boolean exactMatchFirst) {
|
||||
this(buckets, exactMatchFirst);
|
||||
this.normalCompletion = new FSTCompletion(
|
||||
completion.getFST(), false, exactMatchFirst);
|
||||
this.higherWeightsCompletion = new FSTCompletion(
|
||||
completion.getFST(), true, exactMatchFirst);
|
||||
}
|
||||
|
||||
/*
|
||||
* @param buckets
|
||||
* The number of weight discretization buckets (see
|
||||
* {@link FSTCompletion} for details).
|
||||
*
|
||||
* @param exactMatchFirst
|
||||
* If <code>true</code> exact matches are promoted to the top of the
|
||||
* suggestions list. Otherwise they appear in the order of
|
||||
* discretized weight and alphabetical within the bucket.
|
||||
*/
|
||||
public FSTCompletionLookup(int buckets, boolean exactMatchFirst) {
|
||||
this.buckets = buckets;
|
||||
this.exactMatchFirst = exactMatchFirst;
|
||||
}
|
||||
|
||||
/*
|
||||
/**
|
||||
* This constructor takes a pre-built automaton.
|
||||
*
|
||||
* @param completion
|
||||
* An instance of {@link FSTCompletion}.
|
||||
* @param exactMatchFirst
|
||||
* If <code>true</code> exact matches are promoted to the top of the
|
||||
* suggestions list. Otherwise they appear in the order of
|
||||
* discretized weight and alphabetical within the bucket.
|
||||
*/
|
||||
public FSTCompletionLookup(FSTCompletion completion, boolean exactMatchFirst) {
|
||||
this(INVALID_BUCKETS_COUNT, exactMatchFirst);
|
||||
this.normalCompletion = new FSTCompletion(
|
||||
completion.getFST(), false, exactMatchFirst);
|
||||
this.higherWeightsCompletion = new FSTCompletion(
|
||||
completion.getFST(), true, exactMatchFirst);
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void build(TermFreqIterator tfit) throws IOException {
|
||||
|
|
|
@ -687,7 +687,7 @@ public class SimpleFacets {
|
|||
// TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this?
|
||||
// TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
|
||||
// TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, false);
|
||||
c=0;
|
||||
|
||||
if (docsEnum instanceof MultiDocsEnum) {
|
||||
|
|
|
@ -314,7 +314,7 @@ class JoinQuery extends Query {
|
|||
if (freq < minDocFreqFrom) {
|
||||
fromTermDirectCount++;
|
||||
// OK to skip liveDocs, since we check for intersection with docs matching query
|
||||
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum);
|
||||
fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum, false);
|
||||
DocsEnum docsEnum = fromDeState.docsEnum;
|
||||
|
||||
if (docsEnum instanceof MultiDocsEnum) {
|
||||
|
@ -379,7 +379,7 @@ class JoinQuery extends Query {
|
|||
toTermDirectCount++;
|
||||
|
||||
// need to use liveDocs here so we don't map to any deleted ones
|
||||
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.liveDocs, toDeState.docsEnum);
|
||||
toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.liveDocs, toDeState.docsEnum, false);
|
||||
DocsEnum docsEnum = toDeState.docsEnum;
|
||||
|
||||
if (docsEnum instanceof MultiDocsEnum) {
|
||||
|
|
|
@ -559,7 +559,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
if (!termsEnum.seekExact(termBytes, false)) {
|
||||
return -1;
|
||||
}
|
||||
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(reader), null, false);
|
||||
if (docs == null) return -1;
|
||||
int id = docs.nextDoc();
|
||||
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
|
||||
|
@ -861,7 +861,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
int bitsSet = 0;
|
||||
OpenBitSet obs = null;
|
||||
|
||||
DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum);
|
||||
DocsEnum docsEnum = deState.termsEnum.docs(deState.liveDocs, deState.docsEnum, false);
|
||||
if (deState.docsEnum == null) {
|
||||
deState.docsEnum = docsEnum;
|
||||
}
|
||||
|
@ -942,7 +942,7 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean {
|
|||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(termBytes, false)) {
|
||||
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null);
|
||||
docsEnum = termsEnum.docs(MultiFields.getLiveDocs(reader), null, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -277,7 +277,7 @@ public class FileFloatSource extends ValueSource {
|
|||
continue;
|
||||
}
|
||||
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, false);
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
vals[doc] = fval;
|
||||
|
|
|
@ -17,6 +17,14 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -25,6 +33,7 @@ import org.apache.lucene.index.*;
|
|||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.noggit.ObjectBuilder;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -32,14 +41,6 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import static org.apache.solr.core.SolrCore.verbose;
|
||||
|
||||
public class TestRealTimeGet extends SolrTestCaseJ4 {
|
||||
|
@ -727,7 +728,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
|
|||
if (!termsEnum.seekExact(termBytes, false)) {
|
||||
return -1;
|
||||
}
|
||||
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null);
|
||||
DocsEnum docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(r), null, false);
|
||||
int id = docs.nextDoc();
|
||||
if (id != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int next = docs.nextDoc();
|
||||
|
|
Loading…
Reference in New Issue