diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 48fe4a11728..4dfc276d271 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -936,7 +936,7 @@ public class MemoryIndex { } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse) { + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) { if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { reuse = new MemoryDocsEnum(); } diff --git a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 9921763ee91..10d08de9939 100644 --- a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -188,7 +188,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { MemoryIndex memory = new MemoryIndex(); memory.addField("foo", "bar", analyzer); IndexReader reader = memory.createSearcher().getIndexReader(); - DocsEnum disi = reader.termDocsEnum(null, "foo", new BytesRef("bar")); + DocsEnum disi = _TestUtil.docs(random, reader, "foo", new BytesRef("bar"), null, null, false); int docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -196,7 +196,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { // now reuse and check again TermsEnum te = reader.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"), true)); - disi = te.docs(null, disi); + disi = te.docs(null, disi, false); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index ea46caf0824..813970bd089 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -129,12 +129,19 @@ public class FieldNormModifier { if (terms != null) { TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; + DocsEnum docsAndFreqs = null; while(termsEnum.next() != null) { - docs = termsEnum.docs(liveDocs, docs); + docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true); + final DocsEnum docs2; + if (docsAndFreqs != null) { + docs2 = docsAndFreqs; + } else { + docs2 = docs = termsEnum.docs(liveDocs, docs, false); + } while(true) { - int docID = docs.nextDoc(); + int docID = docs2.nextDoc(); if (docID != docs.NO_MORE_DOCS) { - termCounts[docID] += docs.freq(); + termCounts[docID] += docsAndFreqs == null ? 1 : docsAndFreqs.freq(); } else { break; } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java index 79c46993a88..f4987bb43db 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/misc/HighFreqTerms.java @@ -201,7 +201,7 @@ public class HighFreqTerms { return; } } - DocsEnum de = r.termDocsEnum(liveDocs, field, termText); + DocsEnum de = r.termDocsEnum(liveDocs, field, termText, true); if (de != null) { while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) totalTF[0] += de.freq(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java index c999eeef65b..0a2e1df3d0c 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java @@ -29,11 +29,11 @@ import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; @@ -139,7 +139,7 @@ public class TestAppendingCodec extends LuceneTestCase { assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("lazy"))); assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("dog"))); assertEquals(SeekStatus.FOUND, te.seekCeil(new BytesRef("the"))); - DocsEnum de = te.docs(null, null); + DocsEnum de = te.docs(null, null, true); assertTrue(de.advance(0) != DocsEnum.NO_MORE_DOCS); assertEquals(2, de.freq()); assertTrue(de.advance(1) != DocsEnum.NO_MORE_DOCS); diff --git a/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java b/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java index af85bd951d2..7cb7703fea5 100644 --- a/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java +++ b/lucene/contrib/sandbox/src/java/org/apache/lucene/sandbox/queries/DuplicateFilter.java @@ -93,7 +93,7 @@ public class DuplicateFilter extends Filter { if (currTerm == null) { break; } else { - docs = termsEnum.docs(acceptDocs, docs); + docs = termsEnum.docs(acceptDocs, docs, false); int doc = docs.nextDoc(); if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { @@ -133,7 +133,7 @@ public class DuplicateFilter extends Filter { } else { if (termsEnum.docFreq() > 1) { // unset potential duplicates - docs = termsEnum.docs(acceptDocs, docs); + docs = termsEnum.docs(acceptDocs, docs, false); int doc = docs.nextDoc(); if (doc != DocsEnum.NO_MORE_DOCS) { if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE) { diff --git a/lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/DuplicateFilterTest.java b/lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/DuplicateFilterTest.java index d6bc5b3ff72..bb27cccfe90 100644 --- a/lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/DuplicateFilterTest.java +++ b/lucene/contrib/sandbox/src/test/org/apache/lucene/sandbox/queries/DuplicateFilterTest.java @@ -17,6 +17,9 @@ package org.apache.lucene.sandbox.queries; * limitations under the License. */ +import java.io.IOException; +import java.util.HashSet; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; @@ -28,9 +31,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; - -import java.io.IOException; -import java.util.HashSet; +import org.apache.lucene.util._TestUtil; public class DuplicateFilterTest extends LuceneTestCase { private static final String KEY_FIELD = "url"; @@ -134,10 +135,13 @@ public class DuplicateFilterTest extends LuceneTestCase { for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); String url = d.get(KEY_FIELD); - DocsEnum td = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - KEY_FIELD, - new BytesRef(url)); + DocsEnum td = _TestUtil.docs(random, reader, + KEY_FIELD, + new BytesRef(url), + MultiFields.getLiveDocs(reader), + null, + false); + int lastDoc = 0; while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) { lastDoc = td.docID(); @@ -155,10 +159,13 @@ public class DuplicateFilterTest extends LuceneTestCase { for (ScoreDoc hit : hits) { Document d = searcher.doc(hit.doc); String url = d.get(KEY_FIELD); - DocsEnum td = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - KEY_FIELD, - new BytesRef(url)); + DocsEnum td = _TestUtil.docs(random, reader, + KEY_FIELD, + new BytesRef(url), + MultiFields.getLiveDocs(reader), + null, + false); + int lastDoc = 0; td.nextDoc(); lastDoc = td.docID(); diff --git a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java index 35c25f3c510..db00c9cf006 100644 --- a/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java +++ b/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java @@ -57,7 +57,7 @@ public class CartesianShapeFilter extends Filter { return new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef); + return context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false); } @Override @@ -70,7 +70,7 @@ public class CartesianShapeFilter extends Filter { for (int i =0; i< sz; i++) { double boxId = area.get(i).doubleValue(); NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef); - final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef); + final DocsEnum docsEnum = context.reader.termDocsEnum(acceptDocs, fieldName, bytesRef, false); if (docsEnum == null) continue; // iterate through all documents // which have this boxId diff --git a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java index d310b10451d..38dcc593d0b 100644 --- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java +++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java @@ -387,7 +387,7 @@ class BufferedDeletesStream { // System.out.println(" term=" + term); if (termsEnum.seekExact(term.bytes(), false)) { - DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs); + DocsEnum docsEnum = termsEnum.docs(reader.getLiveDocs(), docs, false); //System.out.println("BDS: got docsEnum=" + docsEnum); if (docsEnum != null) { diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 18cdb933ffe..c106311b0c9 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -683,6 +683,7 @@ public class CheckIndex { } DocsEnum docs = null; + DocsEnum docsAndFreqs = null; DocsAndPositionsEnum postings = null; final FieldsEnum fieldsEnum = fields.iterator(); @@ -740,7 +741,8 @@ public class CheckIndex { status.totFreq += docFreq; sumDocFreq += docFreq; - docs = termsEnum.docs(liveDocs, docs); + docs = termsEnum.docs(liveDocs, docs, false); + docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true); postings = termsEnum.docsAndPositions(liveDocs, postings); if (hasOrd) { @@ -762,13 +764,24 @@ public class CheckIndex { status.termCount++; final DocsEnum docs2; + final DocsEnum docsAndFreqs2; final boolean hasPositions; + final boolean hasFreqs; if (postings != null) { docs2 = postings; + docsAndFreqs2 = postings; hasPositions = true; + hasFreqs = true; + } else if (docsAndFreqs != null) { + docs2 = docsAndFreqs; + docsAndFreqs2 = docsAndFreqs; + hasPositions = false; + hasFreqs = true; } else { docs2 = docs; + docsAndFreqs2 = null; hasPositions = false; + hasFreqs = false; } int lastDoc = -1; @@ -780,9 +793,15 @@ public class CheckIndex { break; } visitedDocs.set(doc); - final int freq = docs2.freq(); - status.totPos += freq; - totalTermFreq += freq; + int freq = -1; + if (hasFreqs) { + freq = docsAndFreqs2.freq(); + if (freq <= 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } + status.totPos += freq; + totalTermFreq += freq; + } docCount++; if (doc <= lastDoc) { @@ -793,12 +812,9 @@ public class CheckIndex { } lastDoc = doc; - if (freq <= 0) { - throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); - } int lastPos = -1; - if (postings != null) { + if (hasPositions) { for(int j=0;j { return null; } final ExactDocScorer docScorer = weight.createDocScorer(context); - docsAndFreqs[i] = new DocsAndFreqs(termsEnum.docs( - acceptDocs, null), termsEnum.docFreq(), docScorer); + final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true); + if (docsAndFreqsEnum == null) { + // TODO: we could carry over TermState from the + // terms we already seek'd to, to save re-seeking + // to make the match-only scorer, but it's likely + // rare that BQ mixes terms from omitTf and + // non-omitTF fields: + + // At least one sub cannot provide freqs; abort + // and fallback to full match-only scorer: + return createMatchOnlyConjunctionTermScorer(context, acceptDocs); + } + + docsAndFreqs[i] = new DocsAndFreqs(docsAndFreqsEnum, + docsAndFreqsEnum, + termsEnum.docFreq(), docScorer); } return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord( docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs); } + + private Scorer createMatchOnlyConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs) + throws IOException { + + final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()]; + for (int i = 0; i < docsAndFreqs.length; i++) { + final TermWeight weight = (TermWeight) weights.get(i); + final TermsEnum termsEnum = weight.getTermsEnum(context); + if (termsEnum == null) { + return null; + } + final ExactDocScorer docScorer = weight.createDocScorer(context); + docsAndFreqs[i] = new DocsAndFreqs(null, + termsEnum.docs(acceptDocs, null, false), + termsEnum.docFreq(), docScorer); + } + + return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord( + docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs); + } @Override public boolean scoresDocsOutOfOrder() { diff --git a/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java b/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java index b0a464ef302..e7dad944b76 100644 --- a/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java +++ b/lucene/src/java/org/apache/lucene/search/ConjunctionTermScorer.java @@ -17,17 +17,18 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; -import org.apache.lucene.util.ArrayUtil; import java.io.IOException; import java.util.Comparator; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; +import org.apache.lucene.util.ArrayUtil; + /** Scorer for conjunctions, sets of terms, all of which are required. */ -final class ConjunctionTermScorer extends Scorer { - private final float coord; - private int lastDoc = -1; - private final DocsAndFreqs[] docsAndFreqs; +class ConjunctionTermScorer extends Scorer { + protected final float coord; + protected int lastDoc = -1; + protected final DocsAndFreqs[] docsAndFreqs; private final DocsAndFreqs lead; ConjunctionTermScorer(Weight weight, float coord, @@ -39,7 +40,7 @@ final class ConjunctionTermScorer extends Scorer { // lead the matching. ArrayUtil.mergeSort(docsAndFreqs, new Comparator() { public int compare(DocsAndFreqs o1, DocsAndFreqs o2) { - return o1.freq - o2.freq; + return o1.docFreq - o2.docFreq; } }); @@ -96,14 +97,16 @@ final class ConjunctionTermScorer extends Scorer { } static final class DocsAndFreqs { + final DocsEnum docsAndFreqs; final DocsEnum docs; - final int freq; + final int docFreq; final ExactDocScorer docScorer; int doc = -1; - DocsAndFreqs(DocsEnum docs, int freq, ExactDocScorer docScorer) { + DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactDocScorer docScorer) { + this.docsAndFreqs = docsAndFreqs; this.docs = docs; - this.freq = freq; + this.docFreq = docFreq; this.docScorer = docScorer; } } diff --git a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java index e93c6389aba..9f899715725 100644 --- a/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -342,7 +342,7 @@ class FieldCacheImpl implements FieldCache { break; } final byte termval = parser.parseByte(term); - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -415,7 +415,7 @@ class FieldCacheImpl implements FieldCache { break; } final short termval = parser.parseShort(term); - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -519,7 +519,7 @@ class FieldCacheImpl implements FieldCache { retArray = new int[maxDoc]; } - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -586,7 +586,7 @@ class FieldCacheImpl implements FieldCache { res = new FixedBitSet(maxDoc); } - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); // TODO: use bulk API while (true) { final int docID = docs.nextDoc(); @@ -669,7 +669,7 @@ class FieldCacheImpl implements FieldCache { retArray = new float[maxDoc]; } - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -757,7 +757,7 @@ class FieldCacheImpl implements FieldCache { retArray = new long[maxDoc]; } - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -846,7 +846,7 @@ class FieldCacheImpl implements FieldCache { retArray = new double[maxDoc]; } - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -1020,7 +1020,7 @@ class FieldCacheImpl implements FieldCache { } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException { + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { throw new UnsupportedOperationException(); } @@ -1147,7 +1147,7 @@ class FieldCacheImpl implements FieldCache { termOrdToBytesOffset = termOrdToBytesOffset.resize(ArrayUtil.oversize(1+termOrd, 1)); } termOrdToBytesOffset.set(termOrd, bytes.copyUsingLengthPrefix(term)); - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { @@ -1268,7 +1268,7 @@ class FieldCacheImpl implements FieldCache { break; } final long pointer = bytes.copyUsingLengthPrefix(term); - docs = termsEnum.docs(null, docs); + docs = termsEnum.docs(null, docs, false); while (true) { final int docID = docs.nextDoc(); if (docID == DocsEnum.NO_MORE_DOCS) { diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index c02bd9cb972..84403d758fb 100644 --- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -259,8 +259,8 @@ public final class FuzzyTermsEnum extends TermsEnum { } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException { - return actualEnum.docs(liveDocs, reuse); + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { + return actualEnum.docs(liveDocs, reuse, needsFreqs); } @Override diff --git a/lucene/src/java/org/apache/lucene/search/MatchOnlyConjunctionTermsScorer.java b/lucene/src/java/org/apache/lucene/search/MatchOnlyConjunctionTermsScorer.java new file mode 100644 index 00000000000..9f39d91e0d1 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/MatchOnlyConjunctionTermsScorer.java @@ -0,0 +1,37 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** Scorer for conjunctions, sets of terms, all of which are required. */ +final class MatchOnlyConjunctionTermScorer extends ConjunctionTermScorer { + MatchOnlyConjunctionTermScorer(Weight weight, float coord, + DocsAndFreqs[] docsAndFreqs) throws IOException { + super(weight, coord, docsAndFreqs); + } + + @Override + public float score() throws IOException { + float sum = 0.0f; + for (DocsAndFreqs docs : docsAndFreqs) { + sum += docs.docScorer.score(lastDoc, 1); + } + return sum * coord; + } +} diff --git a/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java b/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java new file mode 100644 index 00000000000..2a4080da74c --- /dev/null +++ b/lucene/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java @@ -0,0 +1,94 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.similarities.Similarity; + +/** Expert: A Scorer for documents matching a + * Term. It treats all documents as having + * one occurrenc (tf=1) for the term. + */ + +final class MatchOnlyTermScorer extends Scorer { + private final DocsEnum docsEnum; + private final Similarity.ExactDocScorer docScorer; + + /** + * Construct a TermScorer. + * + * @param weight + * The weight of the Term in the query. + * @param td + * An iterator over the documents matching the Term. + * @param docScorer + * The Similarity.ExactDocScorer implementation + * to be used for score computations. + */ + MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactDocScorer docScorer) throws IOException { + super(weight); + this.docScorer = docScorer; + this.docsEnum = td; + } + + @Override + public int docID() { + return docsEnum.docID(); + } + + @Override + public float freq() { + return 1.0f; + } + + /** + * Advances to the next document matching the query.
+ * + * @return the document matching the query or NO_MORE_DOCS if there are no more documents. + */ + @Override + public int nextDoc() throws IOException { + return docsEnum.nextDoc(); + } + + @Override + public float score() { + assert docID() != NO_MORE_DOCS; + return docScorer.score(docsEnum.docID(), 1); + } + + /** + * Advances to the first match beyond the current whose document number is + * greater than or equal to a given target.
+ * The implementation uses {@link DocsEnum#advance(int)}. + * + * @param target + * The target document number. + * @return the matching document or NO_MORE_DOCS if none exist. + */ + @Override + public int advance(int target) throws IOException { + return docsEnum.advance(target); + } + + /** Returns a string representation of this TermScorer. */ + @Override + public String toString() { return "scorer(" + weight + ")"; } +} diff --git a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 7dd436b5a35..c199538cd8b 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -229,7 +229,7 @@ public class MultiPhraseQuery extends Query { if (postingsEnum == null) { // term does exist, but has no positions - assert termsEnum.docs(liveDocs, null) != null: "termstate found but no term exists in reader"; + assert termsEnum.docs(liveDocs, null, false) != null: "termstate found but no term exists in reader"; throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); } diff --git a/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java index 49c798b04e8..b1bb1441e16 100644 --- a/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java +++ b/lucene/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java @@ -105,7 +105,7 @@ public class MultiTermQueryWrapperFilter extends Filte do { // System.out.println(" iter termCount=" + termCount + " term=" + // enumerator.term().toBytesString()); - docsEnum = termsEnum.docs(acceptDocs, docsEnum); + docsEnum = termsEnum.docs(acceptDocs, docsEnum, false); int docid; while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { bitSet.set(docid); diff --git a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java index 030dbd8ccce..1ff1219321e 100644 --- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java @@ -244,7 +244,7 @@ public class PhraseQuery extends Query { // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { - assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state) != null: "termstate found but no term exists in reader"; + assert reader.termDocsEnum(liveDocs, t.field(), t.bytes(), state, false) != null: "termstate found but no term exists in reader"; // term does exist, but has no positions throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")"); } diff --git a/lucene/src/java/org/apache/lucene/search/TermQuery.java b/lucene/src/java/org/apache/lucene/search/TermQuery.java index 0ba8e00e146..7a9064198d2 100644 --- a/lucene/src/java/org/apache/lucene/search/TermQuery.java +++ b/lucene/src/java/org/apache/lucene/search/TermQuery.java @@ -21,18 +21,18 @@ import java.io.IOException; import java.util.Set; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.similarities.Similarity.ExactDocScorer; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util.TermContext; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. @@ -83,10 +83,15 @@ public class TermQuery extends Query { if (termsEnum == null) { return null; } - // TODO should we reuse the DocsEnum here? - final DocsEnum docs = termsEnum.docs(acceptDocs, null); - assert docs != null; - return new TermScorer(this, docs, createDocScorer(context)); + DocsEnum docs = termsEnum.docs(acceptDocs, null, true); + if (docs != null) { + return new TermScorer(this, docs, createDocScorer(context)); + } else { + // Index does not store freq info + docs = termsEnum.docs(acceptDocs, null, false); + assert docs != null; + return new MatchOnlyTermScorer(this, docs, createDocScorer(context)); + } } /** @@ -120,12 +125,11 @@ public class TermQuery extends Query { @Override public Explanation explain(AtomicReaderContext context, int doc) throws IOException { - IndexReader reader = context.reader; - DocsEnum docs = reader.termDocsEnum(context.reader.getLiveDocs(), term.field(), term.bytes()); - if (docs != null) { - int newDoc = docs.advance(doc); + Scorer scorer = scorer(context, true, false, context.reader.getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); if (newDoc == doc) { - int freq = docs.freq(); + float freq = scorer.freq(); ExactDocScorer docScorer = similarity.exactDocScorer(stats, term.field(), context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); @@ -136,8 +140,7 @@ public class TermQuery extends Query { return result; } } - - return new ComplexExplanation(false, 0.0f, "no matching term"); + return new ComplexExplanation(false, 0.0f, "no matching term"); } } diff --git a/lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java index 1891d2448e8..43e3b417b97 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/src/test-framework/java/org/apache/lucene/index/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -383,7 +383,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat { } @Override - public DocsEnum docs(Bits liveDocs, DocsEnum reuse) { + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) { return new RAMDocsEnum(ramField.termToDocs.get(current), liveDocs); } diff --git a/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java index d37c4ccd5dc..0cfde9ab597 100644 --- a/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/java/org/apache/lucene/util/_TestUtil.java @@ -37,12 +37,18 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TieredMergePolicy; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.PostingsFormat; @@ -520,4 +526,51 @@ public class _TestUtil { return doc2; } + + // Returns a DocsEnum, but randomly sometimes uses a + // DocsAndFreqsEnum, DocsAndPositionsEnum. Returns null + // if field/term doesn't exist: + public static DocsEnum docs(Random random, IndexReader r, String field, BytesRef term, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { + final Terms terms = MultiFields.getTerms(r, field); + if (terms == null) { + return null; + } + final TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(term, random.nextBoolean())) { + return null; + } + if (random.nextBoolean()) { + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, reuse, true); + if (docsAndFreqs != null) { + return docsAndFreqs; + } + } + return termsEnum.docs(liveDocs, reuse, needsFreqs); + } + + // Returns a DocsEnum from a positioned TermsEnum, but + // randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum. + public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { + if (random.nextBoolean()) { + if (random.nextBoolean()) { + // TODO: cast re-use to D&PE if we can...? + final DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null); + if (docsAndPositions != null) { + return docsAndPositions; + } + } + final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, null, true); + if (docsAndFreqs != null) { + return docsAndFreqs; + } + } + return termsEnum.docs(liveDocs, null, needsFreqs); + } } diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index b8457060373..b92da6d38e6 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -530,7 +530,7 @@ public class TestAddIndexes extends LuceneTestCase { private void verifyTermDocs(Directory dir, Term term, int numDocs) throws IOException { IndexReader reader = IndexReader.open(dir, true); - DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, term.field, term.bytes); + DocsEnum docsEnum = _TestUtil.docs(random, reader, term.field, term.bytes, null, null, false); int count = 0; while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) count++; diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index a4feb22dfcb..dbf96eb9399 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -669,7 +669,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase { // should be found exactly assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false))); assertNull(terms.next()); // should hit end of field @@ -681,12 +681,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase { assertEquals(TermsEnum.SeekStatus.NOT_FOUND, terms.seekCeil(new BytesRef("a"))); assertTrue(terms.term().bytesEquals(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms, null, null, false))); assertNull(terms.next()); assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm)); - assertEquals(35, countDocs(terms.docs(null, null))); + assertEquals(35, countDocs(_TestUtil.docs(random, terms,null, null, false))); assertNull(terms.next()); r.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java index ee4ba8aad6f..6a30123cf71 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java @@ -279,7 +279,7 @@ public class TestCodecs extends LuceneTestCase { // make sure it properly fully resets (rewinds) its // internal state: for(int iter=0;iter<2;iter++) { - docsEnum = termsEnum.docs(null, docsEnum); + docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false); assertEquals(terms[i].docs[0], docsEnum.nextDoc()); assertEquals(DocsEnum.NO_MORE_DOCS, docsEnum.nextDoc()); } @@ -479,7 +479,7 @@ public class TestCodecs extends LuceneTestCase { assertEquals(status, TermsEnum.SeekStatus.FOUND); assertEquals(term.docs.length, termsEnum.docFreq()); if (field.omitTF) { - this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false); + this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false); } else { this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true); } @@ -499,7 +499,7 @@ public class TestCodecs extends LuceneTestCase { assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2))); assertEquals(term.docs.length, termsEnum.docFreq()); if (field.omitTF) { - this.verifyDocs(term.docs, term.positions, termsEnum.docs(null, null), false); + this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random, termsEnum, null, null, false), false); } else { this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true); } @@ -549,15 +549,22 @@ public class TestCodecs extends LuceneTestCase { do { term = field.terms[upto]; if (TestCodecs.random.nextInt(3) == 1) { - final DocsEnum docs = termsEnum.docs(null, null); - final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(null, null); - - final DocsEnum docsEnum; - if (postings != null) { - docsEnum = postings; + final DocsEnum docs; + final DocsEnum docsAndFreqs; + final DocsAndPositionsEnum postings; + if (!field.omitTF) { + postings = termsEnum.docsAndPositions(null, null); + if (postings != null) { + docs = docsAndFreqs = postings; + } else { + docs = docsAndFreqs = _TestUtil.docs(random, termsEnum, null, null, true); + } } else { - docsEnum = docs; + postings = null; + docsAndFreqs = null; + docs = _TestUtil.docs(random, termsEnum, null, null, false); } + assertNotNull(docs); int upto2 = -1; while(upto2 < term.docs.length-1) { // Maybe skip: @@ -567,10 +574,10 @@ public class TestCodecs extends LuceneTestCase { final int inc = 1+TestCodecs.random.nextInt(left-1); upto2 += inc; if (TestCodecs.random.nextInt(2) == 1) { - doc = docsEnum.advance(term.docs[upto2]); + doc = docs.advance(term.docs[upto2]); assertEquals(term.docs[upto2], doc); } else { - doc = docsEnum.advance(1+term.docs[upto2]); + doc = docs.advance(1+term.docs[upto2]); if (doc == DocIdSetIterator.NO_MORE_DOCS) { // skipped past last doc assert upto2 == term.docs.length-1; @@ -584,20 +591,20 @@ public class TestCodecs extends LuceneTestCase { } } } else { - doc = docsEnum.nextDoc(); + doc = docs.nextDoc(); assertTrue(doc != -1); upto2++; } assertEquals(term.docs[upto2], doc); if (!field.omitTF) { - assertEquals(term.positions[upto2].length, docsEnum.freq()); + assertEquals(term.positions[upto2].length, postings.freq()); if (TestCodecs.random.nextInt(2) == 1) { this.verifyPositions(term.positions[upto2], postings); } } } - assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsEnum.nextDoc()); + assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc()); } upto++; diff --git a/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java index a1ab01e3d68..a6fc769e7c2 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -17,7 +17,8 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; +import java.io.IOException; +import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -25,9 +26,8 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; - -import java.io.IOException; -import java.util.Random; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestDirectoryReader extends LuceneTestCase { protected Directory dir; @@ -171,15 +171,18 @@ public class TestDirectoryReader extends LuceneTestCase { // test mixing up TermDocs and TermEnums from different readers. TermsEnum te2 = MultiFields.getTerms(mr2, "body").iterator(null); te2.seekCeil(new BytesRef("wow")); - DocsEnum td = MultiFields.getTermDocsEnum(mr2, - MultiFields.getLiveDocs(mr2), - "body", - te2.term()); + DocsEnum td = _TestUtil.docs(random, mr2, + "body", + te2.term(), + MultiFields.getLiveDocs(mr2), + null, + false); TermsEnum te3 = MultiFields.getTerms(mr3, "body").iterator(null); te3.seekCeil(new BytesRef("wow")); - td = te3.docs(MultiFields.getLiveDocs(mr3), - td); + td = _TestUtil.docs(random, te3, MultiFields.getLiveDocs(mr3), + td, + false); int ret = 0; diff --git a/lucene/src/test/org/apache/lucene/index/TestDocCount.java b/lucene/src/test/org/apache/lucene/index/TestDocCount.java index cd526311d51..223bb0d2517 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocCount.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocCount.java @@ -68,11 +68,14 @@ public class TestDocCount extends LuceneTestCase { String field; while ((field = e.next()) != null) { Terms terms = fields.terms(field); + if (terms == null) { + continue; + } int docCount = terms.getDocCount(); FixedBitSet visited = new FixedBitSet(ir.maxDoc()); TermsEnum te = terms.iterator(null); while (te.next() != null) { - DocsEnum de = te.docs(null, null); + DocsEnum de = _TestUtil.docs(random, te, null, null, false); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { visited.set(de.docID()); } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java index 68ebb457758..e9f5bb02540 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java @@ -22,7 +22,6 @@ import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; @@ -34,6 +33,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ReaderUtil; +import org.apache.lucene.util._TestUtil; public class TestDocsAndPositions extends LuceneTestCase { private String fieldName; @@ -99,16 +99,6 @@ public class TestDocsAndPositions extends LuceneTestCase { return reader.termPositionsEnum(null, fieldName, bytes); } - public DocsEnum getDocsEnum(IndexReader reader, BytesRef bytes, - boolean freqs, Bits liveDocs) throws IOException { - int randInt = random.nextInt(10); - if (randInt == 0) { // once in a while throw in a positions enum - return getDocsAndPositions(reader, bytes, liveDocs); - } else { - return reader.termDocsEnum(liveDocs, fieldName, bytes); - } - } - /** * this test indexes random numbers within a range into a field and checks * their occurrences by searching for a number from that range selected at @@ -232,31 +222,31 @@ public class TestDocsAndPositions extends LuceneTestCase { AtomicReaderContext[] leaves = ReaderUtil.leaves(topReaderContext); for (AtomicReaderContext context : leaves) { int maxDoc = context.reader.maxDoc(); - DocsEnum docsAndPosEnum = getDocsEnum(context.reader, bytes, true, null); + DocsEnum docsEnum = _TestUtil.docs(random, context.reader, fieldName, bytes, null, null, true); if (findNext(freqInDoc, context.docBase, context.docBase + maxDoc) == Integer.MAX_VALUE) { - assertNull(docsAndPosEnum); + assertNull(docsEnum); continue; } - assertNotNull(docsAndPosEnum); - docsAndPosEnum.nextDoc(); + assertNotNull(docsEnum); + docsEnum.nextDoc(); for (int j = 0; j < maxDoc; j++) { if (freqInDoc[context.docBase + j] != 0) { - assertEquals(j, docsAndPosEnum.docID()); - assertEquals(docsAndPosEnum.freq(), freqInDoc[context.docBase +j]); + assertEquals(j, docsEnum.docID()); + assertEquals(docsEnum.freq(), freqInDoc[context.docBase +j]); if (i % 2 == 0 && random.nextInt(10) == 0) { int next = findNext(freqInDoc, context.docBase+j+1, context.docBase + maxDoc) - context.docBase; - int advancedTo = docsAndPosEnum.advance(next); + int advancedTo = docsEnum.advance(next); if (next >= maxDoc) { assertEquals(DocsEnum.NO_MORE_DOCS, advancedTo); } else { assertTrue("advanced to: " +advancedTo + " but should be <= " + next, next >= advancedTo); } } else { - docsAndPosEnum.nextDoc(); + docsEnum.nextDoc(); } } } - assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsAndPosEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsAndPosEnum.docID()); + assertEquals("docBase: " + context.docBase + " maxDoc: " + maxDoc + " " + docsEnum.getClass(), DocsEnum.NO_MORE_DOCS, docsEnum.docID()); } } @@ -343,7 +333,7 @@ public class TestDocsAndPositions extends LuceneTestCase { writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexReader r = getOnlySegmentReader(reader); - DocsEnum disi = r.termDocsEnum(null, "foo", new BytesRef("bar")); + DocsEnum disi = _TestUtil.docs(random, r, "foo", new BytesRef("bar"), null, null, false); int docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -351,7 +341,7 @@ public class TestDocsAndPositions extends LuceneTestCase { // now reuse and check again TermsEnum te = r.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"), true)); - disi = te.docs(null, disi); + disi = _TestUtil.docs(random, te, null, disi, false); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java index 14ad9d0017f..13172e2171c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -270,18 +270,42 @@ public class TestDuelingCodecs extends LuceneTestCase { assertPositionsSkipping(leftTermsEnum.docFreq(), leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); + + // with freqs: + assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, true), + rightDocs = rightTermsEnum.docs(null, rightDocs, true), + true); + assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true), + rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true), + true); + + // w/o freqs: + assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, false), + rightDocs = rightTermsEnum.docs(null, rightDocs, false), + false); + assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false), + rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false), + false); - assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs), - rightDocs = rightTermsEnum.docs(null, rightDocs)); - assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs), - rightDocs = rightTermsEnum.docs(randomBits, rightDocs)); - + // with freqs: assertDocsSkipping(leftTermsEnum.docFreq(), - leftDocs = leftTermsEnum.docs(null, leftDocs), - rightDocs = rightTermsEnum.docs(null, rightDocs)); + leftDocs = leftTermsEnum.docs(null, leftDocs, true), + rightDocs = rightTermsEnum.docs(null, rightDocs, true), + true); assertDocsSkipping(leftTermsEnum.docFreq(), - leftDocs = leftTermsEnum.docs(randomBits, leftDocs), - rightDocs = rightTermsEnum.docs(randomBits, rightDocs)); + leftDocs = leftTermsEnum.docs(randomBits, leftDocs, true), + rightDocs = rightTermsEnum.docs(randomBits, rightDocs, true), + true); + + // w/o freqs: + assertDocsSkipping(leftTermsEnum.docFreq(), + leftDocs = leftTermsEnum.docs(null, leftDocs, false), + rightDocs = rightTermsEnum.docs(null, rightDocs, false), + false); + assertDocsSkipping(leftTermsEnum.docFreq(), + leftDocs = leftTermsEnum.docs(randomBits, leftDocs, false), + rightDocs = rightTermsEnum.docs(randomBits, rightDocs, false), + false); } } assertNull(info, rightTermsEnum.next()); @@ -327,13 +351,19 @@ public class TestDuelingCodecs extends LuceneTestCase { /** * checks docs + freqs, sequentially */ - public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs) throws Exception { + public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception { + if (leftDocs == null) { + assertNull(rightDocs); + return; + } assertTrue(info, leftDocs.docID() == -1 || leftDocs.docID() == DocIdSetIterator.NO_MORE_DOCS); assertTrue(info, rightDocs.docID() == -1 || rightDocs.docID() == DocIdSetIterator.NO_MORE_DOCS); int docid; while ((docid = leftDocs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(info, docid, rightDocs.nextDoc()); - assertEquals(info, leftDocs.freq(), rightDocs.freq()); + if (hasFreqs) { + assertEquals(info, leftDocs.freq(), rightDocs.freq()); + } } assertEquals(info, DocIdSetIterator.NO_MORE_DOCS, rightDocs.nextDoc()); } @@ -341,7 +371,11 @@ public class TestDuelingCodecs extends LuceneTestCase { /** * checks advancing docs */ - public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws Exception { + public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, boolean hasFreqs) throws Exception { + if (leftDocs == null) { + assertNull(rightDocs); + return; + } int docid = -1; int averageGap = leftReader.maxDoc() / (1+docFreq); int skipInterval = 16; @@ -361,7 +395,9 @@ public class TestDuelingCodecs extends LuceneTestCase { if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } - assertEquals(info, leftDocs.freq(), rightDocs.freq()); + if (hasFreqs) { + assertEquals(info, leftDocs.freq(), rightDocs.freq()); + } } } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 68793750708..01a786ca132 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -320,18 +320,20 @@ public class TestIndexReader extends LuceneTestCase Term term, int expected) throws IOException { - DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - term.field(), - new BytesRef(term.text())); - int count = 0; - if (tdocs != null) { - while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) { - count++; - } - } - assertEquals(msg + ", count mismatch", expected, count); + DocsEnum tdocs = _TestUtil.docs(random, reader, + term.field(), + new BytesRef(term.text()), + MultiFields.getLiveDocs(reader), + null, + false); + int count = 0; + if (tdocs != null) { + while(tdocs.nextDoc()!= DocIdSetIterator.NO_MORE_DOCS) { + count++; + } } + assertEquals(msg + ", count mismatch", expected, count); + } public void testBinaryFields() throws IOException { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index fbbc1283fa7..a8c1c696dbe 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -546,10 +546,12 @@ public class TestIndexWriter extends LuceneTestCase { assertEquals(1, reader.numDocs()); Term t = new Term("field", "a"); assertEquals(1, reader.docFreq(t)); - DocsEnum td = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - "field", - new BytesRef("a")); + DocsEnum td = _TestUtil.docs(random, reader, + "field", + new BytesRef("a"), + MultiFields.getLiveDocs(reader), + null, + true); td.nextDoc(); assertEquals(128*1024, td.freq()); reader.close(); @@ -1338,12 +1340,12 @@ public class TestIndexWriter extends LuceneTestCase { // test that the terms were indexed. - assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc1field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc2field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(MultiFields.getTermDocsEnum(ir, null, "binary", new BytesRef("doc3field1")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc1field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc2field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); - assertTrue(MultiFields.getTermDocsEnum(ir, null, "string", new BytesRef("doc3field2")).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc1field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc2field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "binary", new BytesRef("doc3field1"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc1field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc2field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + assertTrue(_TestUtil.docs(random, ir, "string", new BytesRef("doc3field2"), null, null, false).nextDoc() != DocIdSetIterator.NO_MORE_DOCS); ir.close(); dir.close(); @@ -1415,7 +1417,7 @@ public class TestIndexWriter extends LuceneTestCase { TermsEnum t = r.fields().terms("field").iterator(null); int count = 0; while(t.next() != null) { - final DocsEnum docs = t.docs(null, null); + final DocsEnum docs = _TestUtil.docs(random, t, null, null, false); assertEquals(0, docs.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, docs.nextDoc()); count++; diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index b402ceae92b..d0613ace541 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -500,10 +500,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase { // Make sure the doc that hit the exception was marked // as deleted: - DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - t.field(), - new BytesRef(t.text())); + DocsEnum tdocs = _TestUtil.docs(random, reader, + t.field(), + new BytesRef(t.text()), + MultiFields.getLiveDocs(reader), + null, + false); int count = 0; while(tdocs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java index 1920d37c8ff..1225c1e0962 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterReader.java @@ -49,9 +49,11 @@ public class TestIndexWriterReader extends LuceneTestCase { public static int count(Term t, IndexReader r) throws IOException { int count = 0; - DocsEnum td = MultiFields.getTermDocsEnum(r, - MultiFields.getLiveDocs(r), - t.field(), new BytesRef(t.text())); + DocsEnum td = _TestUtil.docs(random, r, + t.field(), new BytesRef(t.text()), + MultiFields.getLiveDocs(r), + null, + false); if (td != null) { while (td.nextDoc() != DocsEnum.NO_MORE_DOCS) { @@ -990,7 +992,7 @@ public class TestIndexWriterReader extends LuceneTestCase { w.addDocument(doc); IndexReader r = IndexReader.open(w, true).getSequentialSubReaders()[0]; try { - r.termDocsEnum(null, "f", new BytesRef("val")); + _TestUtil.docs(random, r, "f", new BytesRef("val"), null, null, false); fail("should have failed to seek since terms index was not loaded."); } catch (IllegalStateException e) { // expected - we didn't load the term index diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java index 40f5ae5fb18..6ea8f2e9108 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util._TestUtil; /** * MultiThreaded IndexWriter tests @@ -209,10 +210,12 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { // Quick test to make sure index is not corrupt: IndexReader reader = IndexReader.open(dir, true); - DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - "field", - new BytesRef("aaa")); + DocsEnum tdocs = _TestUtil.docs(random, reader, + "field", + new BytesRef("aaa"), + MultiFields.getLiveDocs(reader), + null, + false); int count = 0; while(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { count++; diff --git a/lucene/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java index 84f5f52a9d7..ec69b904288 100644 --- a/lucene/src/test/org/apache/lucene/index/TestLongPostings.java +++ b/lucene/src/test/org/apache/lucene/index/TestLongPostings.java @@ -367,7 +367,17 @@ public class TestLongPostings extends LuceneTestCase { System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); } - final DocsEnum postings = MultiFields.getTermDocsEnum(r, null, "field", new BytesRef(term)); + final DocsEnum docs; + final DocsEnum postings; + + if (options == IndexOptions.DOCS_ONLY) { + docs = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, false); + postings = null; + } else { + docs = postings = _TestUtil.docs(random, r, "field", new BytesRef(term), null, null, true); + assert postings != null; + } + assert docs != null; int docID = -1; while(docID < DocsEnum.NO_MORE_DOCS) { @@ -388,7 +398,7 @@ public class TestLongPostings extends LuceneTestCase { expected++; } } - docID = postings.nextDoc(); + docID = docs.nextDoc(); if (VERBOSE) { System.out.println(" got docID=" + docID); } @@ -397,7 +407,7 @@ public class TestLongPostings extends LuceneTestCase { break; } - if (random.nextInt(6) == 3) { + if (random.nextInt(6) == 3 && postings != null) { final int freq = postings.freq(); assertTrue(freq >=1 && freq <= 4); } @@ -424,7 +434,7 @@ public class TestLongPostings extends LuceneTestCase { } } - docID = postings.advance(targetDocID); + docID = docs.advance(targetDocID); if (VERBOSE) { System.out.println(" got docID=" + docID); } @@ -433,7 +443,7 @@ public class TestLongPostings extends LuceneTestCase { break; } - if (random.nextInt(6) == 3) { + if (random.nextInt(6) == 3 && postings != null) { final int freq = postings.freq(); assertTrue("got invalid freq=" + freq, freq >=1 && freq <= 4); } diff --git a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java index 916837faa9e..e86738e97d6 100644 --- a/lucene/src/test/org/apache/lucene/index/TestMultiFields.java +++ b/lucene/src/test/org/apache/lucene/index/TestMultiFields.java @@ -120,7 +120,7 @@ public class TestMultiFields extends LuceneTestCase { System.out.println("TEST: seek term="+ UnicodeUtil.toHexString(term.utf8ToString()) + " " + term); } - DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, liveDocs, "field", term); + DocsEnum docsEnum = _TestUtil.docs(random, reader, "field", term, liveDocs, null, false); assertNotNull(docsEnum); for(int docID : docs.get(term)) { @@ -138,11 +138,12 @@ public class TestMultiFields extends LuceneTestCase { /* private void verify(IndexReader r, String term, List expected) throws Exception { - DocsEnum docs = MultiFields.getTermDocsEnum(r, - MultiFields.getLiveDocs(r), - "field", - new BytesRef(term)); - + DocsEnum docs = _TestUtil.docs(random, r, + "field", + new BytesRef(term), + MultiFields.getLiveDocs(r), + null, + false); for(int docID : expected) { assertEquals(docID, docs.nextDoc()); } @@ -160,8 +161,8 @@ public class TestMultiFields extends LuceneTestCase { w.addDocument(d); IndexReader r = w.getReader(); w.close(); - DocsEnum d1 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j")); - DocsEnum d2 = MultiFields.getTermDocsEnum(r, null, "f", new BytesRef("j")); + DocsEnum d1 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false); + DocsEnum d2 = _TestUtil.docs(random, r, "f", new BytesRef("j"), null, null, false); assertEquals(0, d1.nextDoc()); assertEquals(0, d2.nextDoc()); r.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java b/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java index e2c6badf8b5..7f768693f9f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; /** * @@ -52,7 +53,7 @@ public class TestOmitPositions extends LuceneTestCase { assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); - DocsEnum de = MultiFields.getTermDocsEnum(reader, null, "foo", new BytesRef("test")); + DocsEnum de = _TestUtil.docs(random, reader, "foo", new BytesRef("test"), null, null, true); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertEquals(2, de.freq()); } diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java b/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java index 97fb5573904..ae0432b1ffc 100755 --- a/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java @@ -19,12 +19,13 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestParallelTermEnum extends LuceneTestCase { private IndexReader ir1; @@ -88,31 +89,31 @@ public class TestParallelTermEnum extends LuceneTestCase { TermsEnum te = terms.iterator(null); assertEquals("brown", te.next().utf8ToString()); - DocsEnum td = te.docs(liveDocs, null); + DocsEnum td = _TestUtil.docs(random, te, liveDocs, null, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("fox", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("jumps", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("quick", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("the", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); @@ -125,31 +126,31 @@ public class TestParallelTermEnum extends LuceneTestCase { te = terms.iterator(null); assertEquals("brown", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("fox", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("jumps", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("quick", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("the", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); @@ -162,37 +163,37 @@ public class TestParallelTermEnum extends LuceneTestCase { te = terms.iterator(null); assertEquals("dog", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("fox", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("jumps", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("lazy", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("over", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); assertEquals("the", te.next().utf8ToString()); - td = te.docs(liveDocs, td); + td = _TestUtil.docs(random, te, liveDocs, td, false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(0, td.docID()); assertEquals(td.nextDoc(), DocsEnum.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java index 0187e3fb144..91ba6f94623 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerSegmentDeletes.java @@ -20,8 +20,8 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Random; import java.util.Map; +import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.store.Directory; @@ -31,6 +31,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestPerSegmentDeletes extends LuceneTestCase { public void testDeletes1() throws Exception { @@ -224,7 +225,7 @@ public class TestPerSegmentDeletes extends LuceneTestCase { Terms cterms = fields.terms(term.field); TermsEnum ctermsEnum = cterms.iterator(null); if (ctermsEnum.seekExact(new BytesRef(term.text()), false)) { - DocsEnum docsEnum = ctermsEnum.docs(bits, null); + DocsEnum docsEnum = _TestUtil.docs(random, ctermsEnum, bits, null, false); return toArray(docsEnum); } return null; diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java index b9b00f2b834..366a8ec3f7d 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -17,18 +17,19 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.store.Directory; +import java.io.IOException; +import java.util.Collection; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.Codec; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; - -import java.io.IOException; -import java.util.Collection; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestSegmentMerger extends LuceneTestCase { //The variables for the new merged segment @@ -98,10 +99,12 @@ public class TestSegmentMerger extends LuceneTestCase { assertTrue(newDoc2 != null); assertTrue(DocHelper.numFields(newDoc2) == DocHelper.numFields(doc2) - DocHelper.unstored.size()); - DocsEnum termDocs = MultiFields.getTermDocsEnum(mergedReader, - MultiFields.getLiveDocs(mergedReader), - DocHelper.TEXT_FIELD_2_KEY, - new BytesRef("field")); + DocsEnum termDocs = _TestUtil.docs(random, mergedReader, + DocHelper.TEXT_FIELD_2_KEY, + new BytesRef("field"), + MultiFields.getLiveDocs(mergedReader), + null, + false); assertTrue(termDocs != null); assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java index e68a880734c..11484fc2da6 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -22,12 +22,12 @@ import java.util.Collection; import java.util.Iterator; import java.util.List; -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.BytesRef; - import org.apache.lucene.document.Document; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestSegmentReader extends LuceneTestCase { private Directory dir; @@ -132,16 +132,20 @@ public class TestSegmentReader extends LuceneTestCase { } } - DocsEnum termDocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - DocHelper.TEXT_FIELD_1_KEY, - new BytesRef("field")); + DocsEnum termDocs = _TestUtil.docs(random, reader, + DocHelper.TEXT_FIELD_1_KEY, + new BytesRef("field"), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); - termDocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - DocHelper.NO_NORMS_KEY, - new BytesRef(DocHelper.NO_NORMS_TEXT)); + termDocs = _TestUtil.docs(random, reader, + DocHelper.NO_NORMS_KEY, + new BytesRef(DocHelper.NO_NORMS_TEXT), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java b/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java index 430ff596580..eddcf782f7a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentTermDocs.java @@ -17,14 +17,15 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.store.Directory; +import java.io.IOException; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.TextField; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; - -import java.io.IOException; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestSegmentTermDocs extends LuceneTestCase { private Document testDoc = new Document(); @@ -61,7 +62,7 @@ public class TestSegmentTermDocs extends LuceneTestCase { TermsEnum terms = reader.fields().terms(DocHelper.TEXT_FIELD_2_KEY).iterator(null); terms.seekCeil(new BytesRef("field")); - DocsEnum termDocs = terms.docs(reader.getLiveDocs(), null); + DocsEnum termDocs = _TestUtil.docs(random, terms, reader.getLiveDocs(), null, true); if (termDocs.nextDoc() != DocsEnum.NO_MORE_DOCS) { int docId = termDocs.docID(); assertTrue(docId == 0); @@ -80,9 +81,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random)); assertTrue(reader != null); - DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(), - "textField2", - new BytesRef("bad")); + DocsEnum termDocs = _TestUtil.docs(random, reader, + "textField2", + new BytesRef("bad"), + reader.getLiveDocs(), + null, + false); assertNull(termDocs); reader.close(); @@ -91,9 +95,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { //After adding the document, we should be able to read it back in SegmentReader reader = SegmentReader.get(true, info, indexDivisor, newIOContext(random)); assertTrue(reader != null); - DocsEnum termDocs = reader.termDocsEnum(reader.getLiveDocs(), - "junk", - new BytesRef("bad")); + DocsEnum termDocs = _TestUtil.docs(random, reader, + "junk", + new BytesRef("bad"), + reader.getLiveDocs(), + null, + false); assertNull(termDocs); reader.close(); } @@ -125,10 +132,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { IndexReader reader = IndexReader.open(dir, null, true, indexDivisor); - DocsEnum tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - ta.field(), - new BytesRef(ta.text())); + DocsEnum tdocs = _TestUtil.docs(random, reader, + ta.field(), + new BytesRef(ta.text()), + MultiFields.getLiveDocs(reader), + null, + true); // without optimization (assumption skipInterval == 16) @@ -148,10 +157,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { assertFalse(tdocs.advance(10) != DocsEnum.NO_MORE_DOCS); // without next - tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - ta.field(), - new BytesRef(ta.text())); + tdocs = _TestUtil.docs(random, reader, + ta.field(), + new BytesRef(ta.text()), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(tdocs.advance(0) != DocsEnum.NO_MORE_DOCS); assertEquals(0, tdocs.docID()); @@ -164,10 +175,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { // exactly skipInterval documents and therefore with optimization // with next - tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - tb.field(), - new BytesRef(tb.text())); + tdocs = _TestUtil.docs(random, reader, + tb.field(), + new BytesRef(tb.text()), + MultiFields.getLiveDocs(reader), + null, + true); assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(10, tdocs.docID()); @@ -186,10 +199,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { assertFalse(tdocs.advance(26) != DocsEnum.NO_MORE_DOCS); // without next - tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - tb.field(), - new BytesRef(tb.text())); + tdocs = _TestUtil.docs(random, reader, + tb.field(), + new BytesRef(tb.text()), + MultiFields.getLiveDocs(reader), + null, + true); assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); assertEquals(10, tdocs.docID()); @@ -204,10 +219,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { // much more than skipInterval documents and therefore with optimization // with next - tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - tc.field(), - new BytesRef(tc.text())); + tdocs = _TestUtil.docs(random, reader, + tc.field(), + new BytesRef(tc.text()), + MultiFields.getLiveDocs(reader), + null, + true); assertTrue(tdocs.nextDoc() != DocsEnum.NO_MORE_DOCS); assertEquals(26, tdocs.docID()); @@ -228,10 +245,12 @@ public class TestSegmentTermDocs extends LuceneTestCase { assertFalse(tdocs.advance(76) != DocsEnum.NO_MORE_DOCS); //without next - tdocs = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - tc.field(), - new BytesRef(tc.text())); + tdocs = _TestUtil.docs(random, reader, + tc.field(), + new BytesRef(tc.text()), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(tdocs.advance(5) != DocsEnum.NO_MORE_DOCS); assertEquals(26, tdocs.docID()); assertTrue(tdocs.advance(40) != DocsEnum.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java b/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java index 243a219dd5e..eb6e7620227 100644 --- a/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java +++ b/lucene/src/test/org/apache/lucene/index/TestStressAdvance.java @@ -75,11 +75,11 @@ public class TestStressAdvance extends LuceneTestCase { System.out.println("\nTEST: iter=" + iter + " iter2=" + iter2); } assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("a"))); - de = te.docs(null, de); + de = _TestUtil.docs(random, te, null, de, false); testOne(de, aDocIDs); assertEquals(TermsEnum.SeekStatus.FOUND, te.seekCeil(new BytesRef("b"))); - de = te.docs(null, de); + de = _TestUtil.docs(random, te, null, de, false); testOne(de, bDocIDs); } diff --git a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java index 6bade83129c..4e878215d14 100644 --- a/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java +++ b/lucene/src/test/org/apache/lucene/index/TestStressIndexing2.java @@ -334,7 +334,7 @@ public class TestStressIndexing2 extends LuceneTestCase { // deleted docs): DocsEnum docs = null; while(termsEnum.next() != null) { - docs = termsEnum.docs(liveDocs1, docs); + docs = _TestUtil.docs(random, termsEnum, null, docs, false); while(docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { fail("r1 is not empty but r2 is"); } @@ -354,9 +354,9 @@ public class TestStressIndexing2 extends LuceneTestCase { break; } - termDocs1 = termsEnum.docs(liveDocs1, termDocs1); + termDocs1 = _TestUtil.docs(random, termsEnum, liveDocs1, termDocs1, false); if (termsEnum2.seekExact(term, false)) { - termDocs2 = termsEnum2.docs(liveDocs2, termDocs2); + termDocs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, termDocs2, false); } else { termDocs2 = null; } @@ -415,7 +415,7 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { - dEnum = termsEnum3.docs(null, dEnum); + dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dEnum.freq(); @@ -449,7 +449,7 @@ public class TestStressIndexing2 extends LuceneTestCase { System.out.println(" pos=" + dpEnum.nextPosition()); } } else { - dEnum = termsEnum3.docs(null, dEnum); + dEnum = _TestUtil.docs(random, termsEnum3, null, dEnum, true); assertNotNull(dEnum); assertTrue(dEnum.nextDoc() != DocsEnum.NO_MORE_DOCS); final int freq = dEnum.freq(); @@ -506,7 +506,7 @@ public class TestStressIndexing2 extends LuceneTestCase { } //System.out.println("TEST: term1=" + term1); - docs1 = termsEnum1.docs(liveDocs1, docs1); + docs1 = _TestUtil.docs(random, termsEnum1, liveDocs1, docs1, true); while (docs1.nextDoc() != DocsEnum.NO_MORE_DOCS) { int d = docs1.docID(); int f = docs1.freq(); @@ -540,7 +540,7 @@ public class TestStressIndexing2 extends LuceneTestCase { } //System.out.println("TEST: term1=" + term1); - docs2 = termsEnum2.docs(liveDocs2, docs2); + docs2 = _TestUtil.docs(random, termsEnum2, liveDocs2, docs2, true); while (docs2.nextDoc() != DocsEnum.NO_MORE_DOCS) { int d = r2r1[docs2.docID()]; int f = docs2.freq(); @@ -667,8 +667,8 @@ public class TestStressIndexing2 extends LuceneTestCase { assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum1.nextDoc()); assertEquals(DocsEnum.NO_MORE_DOCS, dpEnum2.nextDoc()); } else { - dEnum1 = termsEnum1.docs(null, dEnum1); - dEnum2 = termsEnum2.docs(null, dEnum2); + dEnum1 = _TestUtil.docs(random, termsEnum1, null, dEnum1, true); + dEnum2 = _TestUtil.docs(random, termsEnum2, null, dEnum2, true); assertNotNull(dEnum1); assertNotNull(dEnum2); int docID1 = dEnum1.nextDoc(); diff --git a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java index d884de0ad42..7edda9ceb02 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermVectorsReader.java @@ -24,9 +24,9 @@ import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -37,6 +37,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestTermVectorsReader extends LuceneTestCase { //Must be lexicographically sorted, will do in setup, versus trying to maintain here @@ -231,7 +232,7 @@ public class TestTermVectorsReader extends LuceneTestCase { //System.out.println("Term: " + term); assertEquals(testTerms[i], term); - docsEnum = termsEnum.docs(null, docsEnum); + docsEnum = _TestUtil.docs(random, termsEnum, null, docsEnum, false); assertNotNull(docsEnum); int doc = docsEnum.docID(); assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java b/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java index 729aeac21ef..bad76695ffc 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java +++ b/lucene/src/test/org/apache/lucene/index/TestTermdocPerf.java @@ -28,8 +28,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; class RepeatingTokenStream extends Tokenizer { @@ -121,7 +122,7 @@ public class TestTermdocPerf extends LuceneTestCase { DocsEnum tdocs = null; for (int i=0; i allEnums = new IdentityHashMap(); TermsEnum te = segment.terms("foo").iterator(null); while (te.next() != null) { - reuse = te.docs(null, reuse); + reuse = te.docs(null, reuse, false); allEnums.put(reuse, true); } @@ -101,7 +101,7 @@ public class TestPulsingReuse extends LuceneTestCase { Map allEnums = new IdentityHashMap(); TermsEnum te = segment.terms("foo").iterator(null); while (te.next() != null) { - reuse = te.docs(null, reuse); + reuse = te.docs(null, reuse, false); allEnums.put(reuse, true); } diff --git a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java index 0b2bd94e4c1..68dabea7c75 100644 --- a/lucene/src/test/org/apache/lucene/search/TestTermVectors.java +++ b/lucene/src/test/org/apache/lucene/search/TestTermVectors.java @@ -35,6 +35,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.English; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; public class TestTermVectors extends LuceneTestCase { private IndexSearcher searcher; @@ -269,7 +270,7 @@ public class TestTermVectors extends LuceneTestCase { while (termsEnum.next() != null) { String text = termsEnum.term().utf8ToString(); - docs = termsEnum.docs(MultiFields.getLiveDocs(knownSearcher.reader), docs); + docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(knownSearcher.reader), docs, true); while (docs.nextDoc() != DocsEnum.NO_MORE_DOCS) { int docId = docs.docID(); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java index 638759fe61b..f93e997ac9e 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java @@ -26,15 +26,16 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { @@ -95,15 +96,21 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { writer.close(); IndexReader reader = IndexReader.open(dir, true); - DocsEnum td = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - "partnum", - new BytesRef("Q36")); + DocsEnum td = _TestUtil.docs(random, + reader, + "partnum", + new BytesRef("Q36"), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); - td = MultiFields.getTermDocsEnum(reader, - MultiFields.getLiveDocs(reader), - "partnum", - new BytesRef("Q37")); + td = _TestUtil.docs(random, + reader, + "partnum", + new BytesRef("Q37"), + MultiFields.getLiveDocs(reader), + null, + false); assertTrue(td.nextDoc() != DocsEnum.NO_MORE_DOCS); } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index ae73bcfe284..c8852526427 100755 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -57,6 +57,7 @@ import org.apache.lucene.search.FieldCache.DocTermsIndex; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util._TestUtil; /** * Test very simply that perf tasks - simple algorithms - are doing what they should. @@ -493,7 +494,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { TermsEnum termsEnum = terms.iterator(null); DocsEnum docs = null; while(termsEnum.next() != null) { - docs = termsEnum.docs(MultiFields.getLiveDocs(reader), docs); + docs = _TestUtil.docs(random, termsEnum, MultiFields.getLiveDocs(reader), docs, true); while(docs.nextDoc() != docs.NO_MORE_DOCS) { totalTokenCount2 += docs.freq(); } diff --git a/modules/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java b/modules/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java index b13284d1c24..1c7e09970dc 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java +++ b/modules/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java @@ -106,8 +106,10 @@ class TakmiSampleFixer implements SampleFixer { Term drillDownTerm = DrillDown.term(searchParams, catPath); // TODO (Facet): avoid Multi*? Bits liveDocs = MultiFields.getLiveDocs(indexReader); - int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs, drillDownTerm.field(), drillDownTerm.bytes()), - docIds.iterator()); + int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs, + drillDownTerm.field(), drillDownTerm.bytes(), + false), + docIds.iterator()); fresNode.setValue(updatedCount); } diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index fe9ce199709..b1e53f44978 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -191,7 +191,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader { indexReaderLock.readLock().lock(); // TODO (Facet): avoid Multi*? Bits liveDocs = MultiFields.getLiveDocs(indexReader); - DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path)); + DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), false); if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { ret = docs.docID(); } diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index ca6baaa4461..89ce05939a4 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -405,7 +405,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { // TODO (Facet): avoid Multi*? Bits liveDocs = MultiFields.getLiveDocs(reader); DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL, - new BytesRef(categoryPath.toString(delimiter))); + new BytesRef(categoryPath.toString(delimiter)), + false); if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return -1; // category does not exist in taxonomy } @@ -441,7 +442,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { } Bits liveDocs = MultiFields.getLiveDocs(reader); DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL, - new BytesRef(categoryPath.toString(delimiter, prefixLen))); + new BytesRef(categoryPath.toString(delimiter, prefixLen)), + false); if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return -1; // category does not exist in taxonomy } @@ -788,7 +790,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { // hence documents), there are no deletions in the index. Therefore, it // is sufficient to call next(), and then doc(), exactly once with no // 'validation' checks. - docsEnum = termsEnum.docs(liveDocs, docsEnum); + docsEnum = termsEnum.docs(liveDocs, docsEnum, false); docsEnum.nextDoc(); cp.clear(); // TODO (Facet): avoid String creation/use bytes? @@ -925,7 +927,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { // like Lucene's merge works, we hope there are few seeks. // TODO (Facet): is there a quicker way? E.g., not specifying the // next term by name every time? - otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i]); + otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false); otherdocsEnum[i].nextDoc(); // TODO (Facet): check? int origordinal = otherdocsEnum[i].docID(); ordinalMaps[i].addMapping(origordinal, newordinal); @@ -942,7 +944,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { // to be added because it already existed in the main taxonomy. // TODO (Facet): Again, is there a quicker way? - mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde); + mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false); mainde.nextDoc(); // TODO (Facet): check? int newordinal = mainde.docID(); @@ -950,7 +952,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { for (int i=0; i