From 72f99b47b4673d858e3ffd42afdca76e0a5bb860 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Mon, 30 Jul 2012 17:31:34 +0000 Subject: [PATCH] LUCENE-4230: allow consumer to specify that payloads are not require when pulling D&PEnum, so codec can optimize if possible git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1367186 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 11 ++ lucene/MIGRATE.txt | 28 +--- .../analysis/core/TestClassicAnalyzer.java | 3 +- .../sinks/TestTeeSinkTokenFilter.java | 2 +- .../lucene/codecs/BlockTermsReader.java | 10 +- .../lucene/codecs/BlockTreeTermsReader.java | 20 +-- .../lucene/codecs/PostingsReaderBase.java | 2 +- .../lucene/codecs/TermVectorsWriter.java | 21 +-- .../apache/lucene/codecs/TermsConsumer.java | 5 +- .../lucene40/Lucene40PostingsReader.java | 5 +- .../lucene40/Lucene40TermVectorsReader.java | 19 +-- .../codecs/memory/DirectPostingsFormat.java | 12 +- .../codecs/memory/MemoryPostingsFormat.java | 6 +- .../codecs/pulsing/PulsingPostingsReader.java | 6 +- .../lucene/codecs/sep/SepPostingsReader.java | 6 +- .../simpletext/SimpleTextFieldsReader.java | 8 +- .../SimpleTextTermVectorsReader.java | 17 ++- .../org/apache/lucene/index/AtomicReader.java | 22 ++- .../org/apache/lucene/index/CheckIndex.java | 132 +++++++----------- .../org/apache/lucene/index/DocTermOrds.java | 4 +- .../lucene/index/DocsAndPositionsEnum.java | 7 + .../lucene/index/FilterAtomicReader.java | 4 +- .../lucene/index/FilteredTermsEnum.java | 4 +- .../org/apache/lucene/index/MultiFields.java | 18 ++- .../apache/lucene/index/MultiTermsEnum.java | 4 +- .../org/apache/lucene/index/TermsEnum.java | 32 ++++- .../apache/lucene/search/FieldCacheImpl.java | 2 +- .../apache/lucene/search/FuzzyTermsEnum.java | 4 +- .../lucene/search/MultiPhraseQuery.java | 4 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../lucene/search/spans/SpanTermQuery.java | 2 +- .../analysis/TestCachingTokenFilter.java | 9 +- .../codecs/pulsing/TestPulsingReuse.java | 5 +- .../org/apache/lucene/index/TestCodecs.java | 6 +- .../lucene/index/TestDirectoryReader.java | 4 +- .../test/org/apache/lucene/index/TestDoc.java | 2 +- .../lucene/index/TestDocsAndPositions.java | 6 +- .../lucene/index/TestDocumentWriter.java | 10 +- .../lucene/index/TestDuelingCodecs.java | 16 +-- .../lucene/index/TestFilterAtomicReader.java | 7 +- .../apache/lucene/index/TestIndexWriter.java | 6 +- .../lucene/index/TestIndexableField.java | 4 +- .../lucene/index/TestLazyProxSkipping.java | 6 +- .../apache/lucene/index/TestLongPostings.java | 2 +- .../lucene/index/TestMultiLevelSkipList.java | 3 +- .../lucene/index/TestOmitPositions.java | 2 +- .../index/TestPayloadProcessorProvider.java | 2 +- .../org/apache/lucene/index/TestPayloads.java | 14 +- .../lucene/index/TestPostingsFormat.java | 37 +++-- .../lucene/index/TestPostingsOffsets.java | 15 +- .../lucene/index/TestSegmentReader.java | 3 +- .../lucene/index/TestStressIndexing2.java | 8 +- .../lucene/index/TestTermVectorsReader.java | 8 +- .../lucene/index/TestTermVectorsWriter.java | 26 ++-- .../lucene/search/TestPositionIncrement.java | 9 +- .../apache/lucene/search/TestTermVectors.java | 12 +- .../lucene/facet/search/PayloadIterator.java | 2 +- .../facet/taxonomy/directory/ParentArray.java | 2 +- .../lucene/search/highlight/TokenSources.java | 9 +- .../TokenStreamFromTermPositionVector.java | 12 +- .../vectorhighlight/FieldTermStack.java | 5 +- .../lucene/index/memory/MemoryIndex.java | 5 +- .../lucene/index/memory/MemoryIndexTest.java | 4 +- .../codecs/ramonly/RAMOnlyPostingsFormat.java | 5 +- .../lucene/index/AssertingAtomicReader.java | 4 +- .../org/apache/lucene/util/_TestUtil.java | 40 ++---- .../component/TermVectorComponent.java | 28 ++-- 67 files changed, 361 insertions(+), 399 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 66a1f7fa01e..48fbbaaa471 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -34,6 +34,12 @@ New features CharFilterFactories to the lucene-analysis module. The API is still experimental. (Chris Male, Robert Muir, Uwe Schindler) +* LUCENE-4230: When pulling a DocsAndPositionsEnum you can now + specify whether or not you require payloads (in addition to + offsets); turning one or both off may allow some codec + implementations to optimize the enum implementation. (Robert Muir, + Mike McCandless) + API Changes * LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3. @@ -72,6 +78,11 @@ API Changes * LUCENE-3884: Moved ElisionFilter out of org.apache.lucene.analysis.fr package into org.apache.lucene.analysis.util. (Robert Muir) +* LUCENE-4230: When pulling a DocsAndPositionsEnum you now pass an int + flags instead of the previous boolean needOffsets. Currently + recognized flags are DocsAndPositionsEnum.FLAG_PAYLOADS and + DocsAndPositionsEnum.FLAG_OFFSETS (Robert Muir, Mike McCandless) + Optimizations * LUCENE-4171: Performance improvements to Packed64. diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index ffb506c5c3a..db28deaf5c2 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -57,30 +57,6 @@ enumeration APIs. Here are the major changes: ... } - The bulk read API has also changed. Instead of this: - - int[] docs = new int[256]; - int[] freqs = new int[256]; - - while(true) { - int count = td.read(docs, freqs) - if (count == 0) { - break; - } - // use docs[i], freqs[i] - } - - do this: - - DocsEnum.BulkReadResult bulk = td.getBulkResult(); - while(true) { - int count = td.read(); - if (count == 0) { - break; - } - // use bulk.docs.ints[i] and bulk.freqs.ints[i] - } - * TermPositions is renamed to DocsAndPositionsEnum, and no longer extends the docs only enumerator (DocsEnum). @@ -170,7 +146,7 @@ enumeration APIs. Here are the major changes: Bits liveDocs = reader.getLiveDocs(); DocsEnum docsEnum = null; - docsEnum = termsEnum.docs(liveDocs, docsEnum); + docsEnum = termsEnum.docs(liveDocs, docsEnum, needsFreqs); You can pass in a prior DocsEnum and it will be reused if possible. @@ -187,7 +163,7 @@ enumeration APIs. Here are the major changes: String field; BytesRef text; - DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text); + DocsEnum docsEnum = reader.termDocsEnum(reader.getLiveDocs(), field, text, needsFreqs); Likewise for DocsAndPositionsEnum. diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java index a11bdaf47c3..e2a1ea07ff6 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java @@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase { DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", - new BytesRef("another"), - false); + new BytesRef("another")); assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java index 0d66b499a20..dd0954e3d74 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java @@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase { TermsEnum termsEnum = vector.iterator(null); termsEnum.next(); assertEquals(2, termsEnum.totalTermFreq()); - DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null); assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, positions.freq()); positions.nextPosition(); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java index 09b427cf97d..9a2d3057851 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java @@ -694,20 +694,14 @@ public class BlockTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; } - if (needsOffsets && - fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { - // Offsets were not indexed: - return null; - } - decodeMetaData(); - return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, needsOffsets); + return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index 46c4532d509..78fe2a06285 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -902,20 +902,14 @@ public class BlockTreeTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; } - if (needsOffsets && - fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { - // Offsets were not indexed: - return null; - } - currentFrame.decodeMetaData(); - return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, needsOffsets); + return postingsReader.docsAndPositions(fieldInfo, currentFrame.termState, skipDocs, reuse, flags); } private int getState() { @@ -2137,21 +2131,15 @@ public class BlockTreeTermsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed: return null; } - if (needsOffsets && - fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { - // Offsets were not indexed: - return null; - } - assert !eof; currentFrame.decodeMetaData(); - return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, needsOffsets); + return postingsReader.docsAndPositions(fieldInfo, currentFrame.state, skipDocs, reuse, flags); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java index 020f4893156..f8d54043c76 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsReaderBase.java @@ -56,7 +56,7 @@ public abstract class PostingsReaderBase implements Closeable { /** Must fully consume state, since after this call that * TermState may be reused. */ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse, - boolean needsOffsets) throws IOException; + int flags) throws IOException; public abstract void close() throws IOException; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java index 1784d1fc368..9896d2cf3f4 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java @@ -224,16 +224,9 @@ public abstract class TermVectorsWriter implements Closeable { // TODO: we need a "query" API where we can ask (via // flex API) what this term was indexed with... // Both positions & offsets: - docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, true); - final boolean hasOffsets; + docsAndPositionsEnum = termsEnum.docsAndPositions(null, null); + boolean hasOffsets = false; boolean hasPositions = false; - if (docsAndPositionsEnum == null) { - // Fallback: no offsets - docsAndPositionsEnum = termsEnum.docsAndPositions(null, null, false); - hasOffsets = false; - } else { - hasOffsets = true; - } if (docsAndPositionsEnum != null) { final int docID = docsAndPositionsEnum.nextDoc(); @@ -242,23 +235,19 @@ public abstract class TermVectorsWriter implements Closeable { for(int posUpto=0; posUpto 0; hasPositions = pos != -1; + hasOffsets = startOffset != -1; startField(fieldInfo, numTerms, hasPositions, hasOffsets); startTerm(termsEnum.term(), freq); startedField = true; } - final int startOffset; - final int endOffset; if (hasOffsets) { - startOffset = docsAndPositionsEnum.startOffset(); - endOffset = docsAndPositionsEnum.endOffset(); assert startOffset != -1; assert endOffset != -1; - } else { - startOffset = -1; - endOffset = -1; } assert !hasPositions || pos >= 0; addPosition(pos, startOffset, endOffset); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java index 85dc6132b49..d216c6a61f6 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/TermsConsumer.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Comparator; import org.apache.lucene.index.FieldInfo; // javadocs +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.MergeState; import org.apache.lucene.index.TermsEnum; @@ -150,7 +151,7 @@ public abstract class TermsConsumer { while((term = termsEnum.next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: - postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, false); + postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS); assert postingsEnumIn != null; postingsEnum.reset(postingsEnumIn); // set PayloadProcessor @@ -184,7 +185,7 @@ public abstract class TermsConsumer { while((term = termsEnum.next()) != null) { // We can pass null for liveDocs, because the // mapping enum will skip the non-live docs: - postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn, true); + postingsEnumIn = (MultiDocsAndPositionsEnum) termsEnum.docsAndPositions(null, postingsEnumIn); assert postingsEnumIn != null; postingsEnum.reset(postingsEnumIn); // set PayloadProcessor diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java index 5898d301c45..f42b2179363 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java @@ -250,11 +250,14 @@ public class Lucene40PostingsReader extends PostingsReaderBase { @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, - DocsAndPositionsEnum reuse, boolean needsOffsets) + DocsAndPositionsEnum reuse, int flags) throws IOException { boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + // TODO: can we optimize if FLAG_PAYLOADS / FLAG_OFFSETS + // isn't passed? + // TODO: refactor if (fieldInfo.hasPayloads() || hasOffsets) { SegmentFullPositionsEnum docsEnum; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java index 0bd9b4d0fe2..b1f94d670fc 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java @@ -490,10 +490,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - if (needsOffsets && !storeOffsets) { - return null; - } + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (!storePositions && !storeOffsets) { return null; @@ -640,14 +637,20 @@ public class Lucene40TermVectorsReader extends TermVectorsReader { @Override public int startOffset() { - assert startOffsets != null; - return startOffsets[nextPos-1]; + if (startOffsets == null) { + return -1; + } else { + return startOffsets[nextPos-1]; + } } @Override public int endOffset() { - assert endOffsets != null; - return endOffsets[nextPos-1]; + if (endOffsets == null) { + return -1; + } else { + return endOffsets[nextPos-1]; + } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java index e0bab97dcd6..d261d9e7625 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java @@ -314,7 +314,7 @@ public class DirectPostingsFormat extends PostingsFormat { termOffsets[count+1] = termOffset; if (hasPos) { - docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum, hasOffsets); + docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum); } else { docsEnum = termsEnum.docs(null, docsEnum, hasFreq); } @@ -858,13 +858,10 @@ public class DirectPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { if (!hasPos) { return null; } - if (needsOffsets && !hasOffsets) { - return null; - } // TODO: implement reuse, something like Pulsing: // it's hairy! @@ -1420,13 +1417,10 @@ public class DirectPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { if (!hasPos) { return null; } - if (needsOffsets && !hasOffsets) { - return null; - } // TODO: implement reuse, something like Pulsing: // it's hairy! diff --git a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java index fb25738f8fb..a6ec974feca 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java @@ -714,13 +714,9 @@ public class MemoryPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; - if (needsOffsets && !hasOffsets) { - return null; // not available - } - if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { return null; } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java index e805e77f1a8..3a13507eb95 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java @@ -213,7 +213,7 @@ public class PulsingPostingsReader extends PostingsReaderBase { @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits liveDocs, DocsAndPositionsEnum reuse, - boolean needsOffsets) throws IOException { + int flags) throws IOException { final PulsingTermState termState = (PulsingTermState) _termState; @@ -240,11 +240,11 @@ public class PulsingPostingsReader extends PostingsReaderBase { } else { if (reuse instanceof PulsingDocsAndPositionsEnum) { DocsAndPositionsEnum wrapped = wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, (DocsAndPositionsEnum) getOther(reuse), - needsOffsets); + flags); setOther(wrapped, reuse); // wrapped.other = reuse return wrapped; } else { - return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, needsOffsets); + return wrappedPostingsReader.docsAndPositions(field, termState.wrappedTermState, liveDocs, reuse, flags); } } } diff --git a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java index 73f5729d398..f14fb007321 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java @@ -278,13 +278,9 @@ public class SepPostingsReader extends PostingsReaderBase { @Override public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState _termState, Bits liveDocs, - DocsAndPositionsEnum reuse, boolean needsOffsets) + DocsAndPositionsEnum reuse, int flags) throws IOException { - if (needsOffsets) { - return null; - } - assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; final SepTermState termState = (SepTermState) _termState; SepDocsAndPositionsEnum postingsEnum; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java index 50e33e44d58..528c07177dd 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java @@ -208,19 +208,13 @@ class SimpleTextFieldsReader extends FieldsProducer { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { // Positions were not indexed return null; } - if (needsOffsets && - indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) < 0) { - // Offsets were not indexed - return null; - } - SimpleTextDocsAndPositionsEnum docsAndPositionsEnum; if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) { docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse; diff --git a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java index 5fc92d6662f..376c1ee2337 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java @@ -365,14 +365,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { SimpleTVPostings postings = current.getValue(); if (postings.positions == null && postings.startOffsets == null) { return null; } - if (needsOffsets && (postings.startOffsets == null || postings.endOffsets == null)) { - return null; - } // TODO: reuse SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum(); e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets); @@ -507,12 +504,20 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader { @Override public int startOffset() { - return startOffsets[nextPos-1]; + if (startOffsets == null) { + return -1; + } else { + return startOffsets[nextPos-1]; + } } @Override public int endOffset() { - return endOffsets[nextPos-1]; + if (endOffsets == null) { + return -1; + } else { + return endOffsets[nextPos-1]; + } } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java index 445e6d2bd73..7c317f9d4e8 100644 --- a/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/AtomicReader.java @@ -133,12 +133,24 @@ public abstract class AtomicReader extends IndexReader { } return null; } + + /** Returns {@link DocsAndPositionsEnum} for the specified + * field & term. This will return null if either the + * field or term does not exist or positions weren't indexed. + * @see #termPositionsEnum(Bits, String, BytesRef, int) */ + public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term) throws IOException { + return termPositionsEnum(liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS); + } + /** Returns {@link DocsAndPositionsEnum} for the specified - * field & term. This may return null, if either the - * field or term does not exist, or needsOffsets is - * true but offsets were not indexed for this field. */ - public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException { + * field & term, with control over whether offsets and payloads are + * required. Some codecs may be able to optimize their + * implementation when offsets and/or payloads are not required. + * This will return null, if either the field or term + * does not exist or positions weren't indexed. See + * {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */ + public final DocsAndPositionsEnum termPositionsEnum(Bits liveDocs, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Fields fields = fields(); @@ -147,7 +159,7 @@ public abstract class AtomicReader extends IndexReader { if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(term, true)) { - return termsEnum.docsAndPositions(liveDocs, null, needsOffsets); + return termsEnum.docsAndPositions(liveDocs, null, flags); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java index 90be0a295a7..96dd81625f2 100644 --- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java @@ -34,6 +34,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; // for javadocs import org.apache.lucene.index.DocValues.SortedSource; import org.apache.lucene.index.DocValues.Source; +import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -682,7 +683,6 @@ public class CheckIndex { DocsEnum docs = null; DocsEnum docsAndFreqs = null; DocsAndPositionsEnum postings = null; - DocsAndPositionsEnum offsets = null; String lastField = null; final FieldsEnum fieldsEnum = fields.iterator(); @@ -699,11 +699,11 @@ public class CheckIndex { // check that the field is in fieldinfos, and is indexed. // TODO: add a separate test to check this for different reader impls - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null) { + FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + if (fieldInfo == null) { throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, no fieldInfos for: " + field); } - if (!fi.isIndexed()) { + if (!fieldInfo.isIndexed()) { throw new RuntimeException("fieldsEnum inconsistent with fieldInfos, isIndexed == false for: " + field); } @@ -757,8 +757,7 @@ public class CheckIndex { docs = termsEnum.docs(liveDocs, docs, false); docsAndFreqs = termsEnum.docs(liveDocs, docsAndFreqs, true); - postings = termsEnum.docsAndPositions(liveDocs, postings, false); - offsets = termsEnum.docsAndPositions(liveDocs, offsets, true); + postings = termsEnum.docsAndPositions(liveDocs, postings); if (hasOrd) { long ord = -1; @@ -782,29 +781,20 @@ public class CheckIndex { final DocsEnum docsAndFreqs2; final boolean hasPositions; final boolean hasFreqs; - final boolean hasOffsets; - if (offsets != null) { - docs2 = postings = offsets; - docsAndFreqs2 = postings = offsets; - hasOffsets = true; - hasPositions = true; - hasFreqs = true; - } else if (postings != null) { + final boolean hasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + if (postings != null) { docs2 = postings; docsAndFreqs2 = postings; - hasOffsets = false; hasPositions = true; hasFreqs = true; } else if (docsAndFreqs != null) { docs2 = docsAndFreqs; docsAndFreqs2 = docsAndFreqs; - hasOffsets = false; hasPositions = false; hasFreqs = true; } else { docs2 = docs; docsAndFreqs2 = null; - hasOffsets = false; hasPositions = false; hasFreqs = false; } @@ -927,7 +917,7 @@ public class CheckIndex { if (hasPositions) { for(int idx=0;idx<7;idx++) { final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8); - postings = termsEnum.docsAndPositions(liveDocs, postings, hasOffsets); + postings = termsEnum.docsAndPositions(liveDocs, postings); final int docID = postings.advance(skipDocID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; @@ -1460,44 +1450,35 @@ public class CheckIndex { BytesRef term = null; while ((term = termsEnum.next()) != null) { - final boolean hasPositions; - final boolean hasOffsets; + final boolean hasProx; final boolean hasFreqs; // TODO: really we need a reflection/query // API so we can just ask what was indexed // instead of "probing"... + // TODO: cleanup: + // Try offsets: - postings = termsEnum.docsAndPositions(null, postings, true); + postings = termsEnum.docsAndPositions(null, postings); if (postings == null) { - hasOffsets = false; - // Try only positions: - postings = termsEnum.docsAndPositions(null, postings, false); - if (postings == null) { - hasPositions = false; - // Try docIDs & freqs: - docs = termsEnum.docs(null, docs, true); - if (docs == null) { - // OK, only docIDs: - hasFreqs = false; - docs = termsEnum.docs(null, docs, false); - } else { - hasFreqs = true; - } + hasProx = false; + // Try docIDs & freqs: + docs = termsEnum.docs(null, docs, true); + if (docs == null) { + // OK, only docIDs: + hasFreqs = false; + docs = termsEnum.docs(null, docs, false); } else { - hasPositions = true; - hasFreqs = true; + hasFreqs = true; } } else { - hasOffsets = true; - // NOTE: may be a lie... but we accept -1 - hasPositions = true; + hasProx = true; hasFreqs = true; } final DocsEnum docs2; - if (hasPositions || hasOffsets) { + if (hasProx) { assert postings != null; docs2 = postings; } else { @@ -1510,23 +1491,18 @@ public class CheckIndex { if (!postingsTermsEnum.seekExact(term, true)) { throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); } - postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, true); + postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings); if (postingsPostings == null) { - // Term vectors were indexed w/ offsets but postings were not - postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings, false); - if (postingsPostings == null) { - postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true); + // Term vectors were indexed w/ pos but postings were not + postingsDocs = postingsTermsEnum.docs(null, postingsDocs, true); + if (postingsDocs == null) { + postingsHasFreq = false; + postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false); if (postingsDocs == null) { - postingsHasFreq = false; - postingsDocs = postingsTermsEnum.docs(null, postingsDocs, false); - if (postingsDocs == null) { - throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); - } - } else { - postingsHasFreq = true; + throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); } } else { - postingsHasFreq = true; + postingsHasFreq = true; } } else { postingsHasFreq = true; @@ -1555,7 +1531,7 @@ public class CheckIndex { throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq()); } - if (hasPositions || hasOffsets) { + if (hasProx) { for (int i = 0; i < tf; i++) { int pos = postings.nextPosition(); if (postingsPostings != null) { @@ -1565,32 +1541,30 @@ public class CheckIndex { } } - if (hasOffsets) { - // Call the methods to at least make - // sure they don't throw exc: - final int startOffset = postings.startOffset(); - final int endOffset = postings.endOffset(); - // TODO: these are too anal...? - /* - if (endOffset < startOffset) { - throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset); - } - if (startOffset < lastStartOffset) { - throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset); - } - lastStartOffset = startOffset; - */ + // Call the methods to at least make + // sure they don't throw exc: + final int startOffset = postings.startOffset(); + final int endOffset = postings.endOffset(); + // TODO: these are too anal...? + /* + if (endOffset < startOffset) { + throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset); + } + if (startOffset < lastStartOffset) { + throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset); + } + lastStartOffset = startOffset; + */ - if (postingsPostings != null) { - final int postingsStartOffset = postingsPostings.startOffset(); + if (postingsPostings != null) { + final int postingsStartOffset = postingsPostings.startOffset(); - final int postingsEndOffset = postingsPostings.endOffset(); - if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) { - throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset); - } - if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) { - throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset); - } + final int postingsEndOffset = postingsPostings.endOffset(); + if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset); + } + if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset); } } } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java index b84a95f6705..002b6aa4902 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java @@ -673,8 +673,8 @@ public class DocTermOrds { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - return termsEnum.docsAndPositions(liveDocs, reuse, needsOffsets); + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return termsEnum.docsAndPositions(liveDocs, reuse, flags); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java b/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java index 4a28bc2336a..d604439a169 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java @@ -23,7 +23,14 @@ import org.apache.lucene.util.BytesRef; /** Also iterates through positions. */ public abstract class DocsAndPositionsEnum extends DocsEnum { + /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} + * if you require offsets in the returned enum. */ + public static final int FLAG_OFFSETS = 0x1; + /** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} + * if you require payloads in the returned enum. */ + public static final int FLAG_PAYLOADS = 0x2; + /** Returns the next position. You should only call this * up to {@link DocsEnum#freq()} times else * the behavior is not defined. If positions were not diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java index 866a00755ee..782c8f06494 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java @@ -186,8 +186,8 @@ public class FilterAtomicReader extends AtomicReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - return in.docsAndPositions(liveDocs, reuse, needsOffsets); + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return in.docsAndPositions(liveDocs, reuse, flags); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java index 13532bc96b2..c5de38e4cf7 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java @@ -171,8 +171,8 @@ public abstract class FilteredTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - return tenum.docsAndPositions(bits, reuse, needsOffsets); + public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, int flags) throws IOException { + return tenum.docsAndPositions(bits, reuse, flags); } /** This enum does not support seeking! diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java index cdaeeba54f3..6ac319cda30 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiFields.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiFields.java @@ -139,15 +139,27 @@ public final class MultiFields extends Fields { /** Returns {@link DocsAndPositionsEnum} for the specified * field & term. This may return null if the term does - * not exist or positions were not indexed. */ - public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, boolean needsOffsets) throws IOException { + * not exist or positions were not indexed. + * @see #getTermPositionsEnum(IndexReader, Bits, String, BytesRef, int) */ + public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException { + return getTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS); + } + + /** Returns {@link DocsAndPositionsEnum} for the specified + * field & term, with control over whether offsets and payloads are + * required. Some codecs may be able to optimize + * their implementation when offsets and/or payloads are not + * required. This will return null if the term does not + * exist or positions were not indexed. See {@link + * TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */ + public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException { assert field != null; assert term != null; final Terms terms = getTerms(r, field); if (terms != null) { final TermsEnum termsEnum = terms.iterator(null); if (termsEnum.seekExact(term, true)) { - return termsEnum.docsAndPositions(liveDocs, null, needsOffsets); + return termsEnum.docsAndPositions(liveDocs, null, flags); } } return null; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java index 8a8f97d6aab..ce493a03edb 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -415,7 +415,7 @@ public final class MultiTermsEnum extends TermsEnum { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { MultiDocsAndPositionsEnum docsAndPositionsEnum; // Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) { @@ -466,7 +466,7 @@ public final class MultiTermsEnum extends TermsEnum { } assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length; - final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], needsOffsets); + final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags); if (subPostings != null) { docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings; diff --git a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java index 7d0a580f62f..e279b0bd9ec 100644 --- a/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/TermsEnum.java @@ -138,6 +138,8 @@ public abstract class TermsEnum implements BytesRefIterator { * deleted documents into account. */ public abstract long totalTermFreq() throws IOException; + // TODO: cutover to flags / make needsFreqs "a hint" / add + // default w/ needsFreqs=true /** Get {@link DocsEnum} for the current term. Do not * call this when the enum is unpositioned. This method * may return null (if needsFreqs is true but freqs were @@ -152,14 +154,32 @@ public abstract class TermsEnum implements BytesRefIterator { public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException; /** Get {@link DocsAndPositionsEnum} for the current term. - * Do not call this when the enum is unpositioned. - * This method will only return null if needsOffsets is - * true but offsets were not indexed. + * Do not call this when the enum is unpositioned. This + * method will return null if positions were not + * indexed. + * * @param liveDocs unset bits are documents that should not * be returned * @param reuse pass a prior DocsAndPositionsEnum for possible reuse - * @param needsOffsets true if offsets are required */ - public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException; + * @see #docsAndPositions(Bits, DocsAndPositionsEnum, int) */ + public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException { + return docsAndPositions(liveDocs, reuse, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS); + } + + /** Get {@link DocsAndPositionsEnum} for the current term, + * with control over whether offsets and payloads are + * required. Some codecs may be able to optimize their + * implementation when offsets and/or payloads are not required. + * Do not call this when the enum is unpositioned. This + * will return null if positions were not indexed. + + * @param liveDocs unset bits are documents that should not + * be returned + * @param reuse pass a prior DocsAndPositionsEnum for possible reuse + * @param flags specifies which optional per-position values you + * require; see {@link DocsAndPositionsEnum#FLAG_OFFSETS} and + * {@link DocsAndPositionsEnum#FLAG_PAYLOADS}. */ + public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException; /** * Expert: Returns the TermsEnums internal state to position the TermsEnum @@ -225,7 +245,7 @@ public abstract class TermsEnum implements BytesRefIterator { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { throw new IllegalStateException("this method should never be called"); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java index f3ec3478a82..2dfb97610c1 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/lucene/core/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -1047,7 +1047,7 @@ class FieldCacheImpl implements FieldCache { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { throw new UnsupportedOperationException(); } diff --git a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java index 673f01107c0..a8c7f746204 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java @@ -278,8 +278,8 @@ public class FuzzyTermsEnum extends TermsEnum { @Override public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, - DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - return actualEnum.docsAndPositions(liveDocs, reuse, needsOffsets); + DocsAndPositionsEnum reuse, int flags) throws IOException { + return actualEnum.docsAndPositions(liveDocs, reuse, flags); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 50dab036460..4e5b703f68c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -226,7 +226,7 @@ public class MultiPhraseQuery extends Query { return null; } termsEnum.seekExact(term.bytes(), termState); - postingsEnum = termsEnum.docsAndPositions(liveDocs, null, false); + postingsEnum = termsEnum.docsAndPositions(liveDocs, null, 0); if (postingsEnum == null) { // term does exist, but has no positions @@ -484,7 +484,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { continue; } termsEnum.seekExact(term.bytes(), termState); - DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, false); + DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, 0); if (postings == null) { // term does exist, but has no positions throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 68b5d69dca5..b51c54daad5 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -264,7 +264,7 @@ public class PhraseQuery extends Query { return null; } te.seekExact(t.bytes(), state); - DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, false); + DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, 0); // PhraseQuery on a field that did not index // positions. diff --git a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java index 86e0d356051..7fcbfa0135d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/spans/SpanTermQuery.java @@ -120,7 +120,7 @@ public class SpanTermQuery extends SpanQuery { final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); termsEnum.seekExact(term.bytes(), state); - final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, false); + final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS); if (postings != null) { return new TermSpans(postings, term); diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java index 818d471c12c..af307bbd0a6 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java @@ -75,8 +75,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "preanalyzed", - new BytesRef("term1"), - false); + new BytesRef("term1")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(0, termPositions.nextPosition()); @@ -84,8 +83,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "preanalyzed", - new BytesRef("term2"), - false); + new BytesRef("term2")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, termPositions.freq()); assertEquals(1, termPositions.nextPosition()); @@ -94,8 +92,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "preanalyzed", - new BytesRef("term3"), - false); + new BytesRef("term3")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(2, termPositions.nextPosition()); diff --git a/lucene/core/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java b/lucene/core/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java index 3048ca71a3c..c84fbd3ae49 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/pulsing/TestPulsingReuse.java @@ -35,7 +35,6 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.BaseDirectoryWrapper; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -71,7 +70,7 @@ public class TestPulsingReuse extends LuceneTestCase { DocsAndPositionsEnum posReuse = null; te = segment.terms("foo").iterator(null); while (te.next() != null) { - posReuse = te.docsAndPositions(null, posReuse, false); + posReuse = te.docsAndPositions(null, posReuse); allEnums.put(posReuse, true); } @@ -113,7 +112,7 @@ public class TestPulsingReuse extends LuceneTestCase { DocsAndPositionsEnum posReuse = null; te = segment.terms("foo").iterator(null); while (te.next() != null) { - posReuse = te.docsAndPositions(null, posReuse, false); + posReuse = te.docsAndPositions(null, posReuse); allEnums.put(posReuse, true); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index fdb9307731c..d47fa410245 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -474,7 +474,7 @@ public class TestCodecs extends LuceneTestCase { if (field.omitTF) { this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random(), termsEnum, null, null, false), false); } else { - this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true); + this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true); } // Test random seek by ord: @@ -494,7 +494,7 @@ public class TestCodecs extends LuceneTestCase { if (field.omitTF) { this.verifyDocs(term.docs, term.positions, _TestUtil.docs(random(), termsEnum, null, null, false), false); } else { - this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null, false), true); + this.verifyDocs(term.docs, term.positions, termsEnum.docsAndPositions(null, null), true); } } @@ -546,7 +546,7 @@ public class TestCodecs extends LuceneTestCase { final DocsEnum docsAndFreqs; final DocsAndPositionsEnum postings; if (!field.omitTF) { - postings = termsEnum.docsAndPositions(null, null, false); + postings = termsEnum.docsAndPositions(null, null); if (postings != null) { docs = docsAndFreqs = postings; } else { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java index f9d3bf3afcf..9fd10fe8606 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -631,8 +631,8 @@ public void testFilesOpenClose() throws IOException { while(enum1.next() != null) { assertEquals("Different terms", enum1.term(), enum2.next()); - DocsAndPositionsEnum tp1 = enum1.docsAndPositions(liveDocs, null, false); - DocsAndPositionsEnum tp2 = enum2.docsAndPositions(liveDocs, null, false); + DocsAndPositionsEnum tp1 = enum1.docsAndPositions(liveDocs, null); + DocsAndPositionsEnum tp2 = enum2.docsAndPositions(liveDocs, null); while(tp1.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertTrue(tp2.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java index f69225f1e25..ab9abbfbaf7 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java @@ -247,7 +247,7 @@ public class TestDoc extends LuceneTestCase { out.print(" term=" + field + ":" + tis.term()); out.println(" DF=" + tis.docFreq()); - DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null, false); + DocsAndPositionsEnum positions = tis.docsAndPositions(reader.getLiveDocs(), null); while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { out.print(" doc=" + positions.docID()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocsAndPositions.java b/lucene/core/src/test/org/apache/lucene/index/TestDocsAndPositions.java index 536b1cfe48d..4113652eb2e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocsAndPositions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocsAndPositions.java @@ -92,7 +92,7 @@ public class TestDocsAndPositions extends LuceneTestCase { public DocsAndPositionsEnum getDocsAndPositions(AtomicReader reader, BytesRef bytes, Bits liveDocs) throws IOException { - return reader.termPositionsEnum(null, fieldName, bytes, false); + return reader.termPositionsEnum(null, fieldName, bytes); } /** @@ -352,7 +352,7 @@ public class TestDocsAndPositions extends LuceneTestCase { writer.addDocument(doc); DirectoryReader reader = writer.getReader(); AtomicReader r = getOnlySegmentReader(reader); - DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar"), false); + DocsAndPositionsEnum disi = r.termPositionsEnum(null, "foo", new BytesRef("bar")); int docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -360,7 +360,7 @@ public class TestDocsAndPositions extends LuceneTestCase { // now reuse and check again TermsEnum te = r.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"), true)); - disi = te.docsAndPositions(null, disi, false); + disi = te.docsAndPositions(null, disi); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java index 11445dda98a..13d35d253b7 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDocumentWriter.java @@ -129,7 +129,7 @@ public class TestDocumentWriter extends LuceneTestCase { SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), - "repeated", new BytesRef("repeated"), false); + "repeated", new BytesRef("repeated")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.freq(); assertEquals(2, freq); @@ -200,7 +200,7 @@ public class TestDocumentWriter extends LuceneTestCase { writer.close(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); - DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a"), false); + DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader, reader.getLiveDocs(), "f1", new BytesRef("a")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.freq(); assertEquals(3, freq); @@ -243,18 +243,18 @@ public class TestDocumentWriter extends LuceneTestCase { writer.close(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, newIOContext(random())); - DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1"), false); + DocsAndPositionsEnum termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term1")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(0, termPositions.nextPosition()); - termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2"), false); + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term2")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(2, termPositions.freq()); assertEquals(1, termPositions.nextPosition()); assertEquals(3, termPositions.nextPosition()); - termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3"), false); + termPositions = reader.termPositionsEnum(reader.getLiveDocs(), "preanalyzed", new BytesRef("term3")); assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, termPositions.freq()); assertEquals(2, termPositions.nextPosition()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java index 80856d841c6..423200871e0 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -329,17 +329,17 @@ public class TestDuelingCodecs extends LuceneTestCase { assertEquals(info, term, rightTermsEnum.next()); assertTermStats(leftTermsEnum, rightTermsEnum); if (deep) { - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, false), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, false)); - assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, false), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, false)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions)); + assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, false), - rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, false)); + leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions), + rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions)); assertPositionsSkipping(leftTermsEnum.docFreq(), - leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, false), - rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, false)); + leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions), + rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions)); // with freqs: assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, true), diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFilterAtomicReader.java b/lucene/core/src/test/org/apache/lucene/index/TestFilterAtomicReader.java index 706abb97fcb..93f3cbd6b9b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFilterAtomicReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFilterAtomicReader.java @@ -89,8 +89,8 @@ public class TestFilterAtomicReader extends LuceneTestCase { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { - return new TestPositions(super.docsAndPositions(liveDocs, reuse == null ? null : ((FilterDocsAndPositionsEnum) reuse).in, needsOffsets)); + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { + return new TestPositions(super.docsAndPositions(liveDocs, reuse == null ? null : ((FilterDocsAndPositionsEnum) reuse).in, flags)); } } @@ -164,8 +164,7 @@ public class TestFilterAtomicReader extends LuceneTestCase { assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(new BytesRef("one"))); - DocsAndPositionsEnum positions = terms.docsAndPositions(MultiFields.getLiveDocs(reader), - null, false); + DocsAndPositionsEnum positions = terms.docsAndPositions(MultiFields.getLiveDocs(reader), null); while (positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { assertTrue((positions.docID() % 2) == 1); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index cba706ce5bd..86ce08ba27c 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -940,14 +940,14 @@ public class TestIndexWriter extends LuceneTestCase { Terms tpv = r.getTermVectors(0).terms("field"); TermsEnum termsEnum = tpv.iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(100, dpEnum.nextPosition()); assertNotNull(termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); @@ -1588,7 +1588,7 @@ public class TestIndexWriter extends LuceneTestCase { // Make sure position is still incremented when // massive term is skipped: - DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another"), false); + DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader, null, "content", new BytesRef("another")); assertEquals(0, tps.nextDoc()); assertEquals(1, tps.freq()); assertEquals(3, tps.nextPosition()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java index 79f3fa3f222..095c4012ae7 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexableField.java @@ -264,14 +264,14 @@ public class TestIndexableField extends LuceneTestCase { TermsEnum termsEnum = tfv.iterator(null); assertEquals(new BytesRef(""+counter), termsEnum.next()); assertEquals(1, termsEnum.totalTermFreq()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(1, dpEnum.nextPosition()); assertEquals(new BytesRef("text"), termsEnum.next()); assertEquals(1, termsEnum.totalTermFreq()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(0, dpEnum.nextPosition()); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLazyProxSkipping.java b/lucene/core/src/test/org/apache/lucene/index/TestLazyProxSkipping.java index 63b70ba21f4..b8716b7dbaf 100755 --- a/lucene/core/src/test/org/apache/lucene/index/TestLazyProxSkipping.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestLazyProxSkipping.java @@ -157,8 +157,7 @@ public class TestLazyProxSkipping extends LuceneTestCase { DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), this.field, - new BytesRef("b"), - false); + new BytesRef("b")); for (int i = 0; i < 10; i++) { tp.nextDoc(); @@ -169,8 +168,7 @@ public class TestLazyProxSkipping extends LuceneTestCase { tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), this.field, - new BytesRef("a"), - false); + new BytesRef("a")); for (int i = 0; i < 10; i++) { tp.nextDoc(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java index 6a1c883a9bc..c20315258db 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java @@ -170,7 +170,7 @@ public class TestLongPostings extends LuceneTestCase { System.out.println("\nTEST: iter=" + iter + " doS1=" + doS1); } - final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(r, null, "field", new BytesRef(term), false); + final DocsAndPositionsEnum postings = MultiFields.getTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while(docID < DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java index 5c440f91ccb..1746109a871 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiLevelSkipList.java @@ -86,8 +86,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase { counter = 0; DocsAndPositionsEnum tp = reader.termPositionsEnum(reader.getLiveDocs(), term.field(), - new BytesRef(term.text()), - false); + new BytesRef(term.text())); checkSkipTo(tp, 14, 185); // no skips checkSkipTo(tp, 17, 190); // one skip on level 0 diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOmitPositions.java b/lucene/core/src/test/org/apache/lucene/index/TestOmitPositions.java index 84a01caab16..b8c92423429 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOmitPositions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOmitPositions.java @@ -51,7 +51,7 @@ public class TestOmitPositions extends LuceneTestCase { IndexReader reader = w.getReader(); w.close(); - assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test"), false)); + assertNull(MultiFields.getTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = _TestUtil.docs(random(), reader, "foo", new BytesRef("test"), null, null, true); while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java index ecf4703006e..f2b2657d4ed 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java @@ -160,7 +160,7 @@ public class TestPayloadProcessorProvider extends LuceneTestCase { IndexReader reader = DirectoryReader.open(dir); try { int numPayloads = 0; - DocsAndPositionsEnum tpe = MultiFields.getTermPositionsEnum(reader, null, field, text, false); + DocsAndPositionsEnum tpe = MultiFields.getTermPositionsEnum(reader, null, field, text); while (tpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { tpe.nextPosition(); if (tpe.hasPayload()) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java index 3e1a88ab803..977e7d6c297 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java @@ -188,8 +188,7 @@ public class TestPayloads extends LuceneTestCase { tps[i] = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), terms[i].field(), - new BytesRef(terms[i].text()), - false); + new BytesRef(terms[i].text())); } while (tps[0].nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { @@ -226,8 +225,7 @@ public class TestPayloads extends LuceneTestCase { DocsAndPositionsEnum tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), terms[0].field(), - new BytesRef(terms[0].text()), - false); + new BytesRef(terms[0].text())); tp.nextDoc(); tp.nextPosition(); // NOTE: prior rev of this test was failing to first @@ -255,8 +253,7 @@ public class TestPayloads extends LuceneTestCase { tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), terms[1].field(), - new BytesRef(terms[1].text()), - false); + new BytesRef(terms[1].text())); tp.nextDoc(); tp.nextPosition(); assertEquals("Wrong payload length.", 1, tp.getPayload().length); @@ -299,8 +296,7 @@ public class TestPayloads extends LuceneTestCase { tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), fieldName, - new BytesRef(singleTerm), - false); + new BytesRef(singleTerm)); tp.nextDoc(); tp.nextPosition(); @@ -501,7 +497,7 @@ public class TestPayloads extends LuceneTestCase { DocsAndPositionsEnum tp = null; while (terms.next() != null) { String termText = terms.term().utf8ToString(); - tp = terms.docsAndPositions(liveDocs, tp, false); + tp = terms.docsAndPositions(liveDocs, tp); while(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = tp.freq(); for (int i = 0; i < freq; i++) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java index 2c74eca7f4d..9bc540fcc1e 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java @@ -518,7 +518,7 @@ public class TestPostingsFormat extends LuceneTestCase { maxIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; boolean doCheckOffsets = allowOffsets && random().nextInt(3) <= 2; - boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads(); + boolean doCheckPayloads = options.contains(Option.PAYLOADS) && allowPositions && fieldInfo.hasPayloads() && random().nextInt(3) <= 2;; DocsEnum prevDocsEnum = null; @@ -528,15 +528,24 @@ public class TestPostingsFormat extends LuceneTestCase { if (!doCheckPositions) { if (allowPositions && random().nextInt(10) == 7) { // 10% of the time, even though we will not check positions, pull a DocsAndPositions enum - if (VERBOSE) { - System.out.println(" get DocsAndPositionsEnum (but we won't check positions)"); - } if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) { prevDocsEnum = threadState.reuseDocsAndPositionsEnum; } - threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, false); + int flags = 0; + if (random().nextBoolean()) { + flags |= DocsAndPositionsEnum.FLAG_OFFSETS; + } + if (random().nextBoolean()) { + flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; + } + + if (VERBOSE) { + System.out.println(" get DocsAndPositionsEnum (but we won't check positions) flags=" + flags); + } + + threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags); docsEnum = threadState.reuseDocsAndPositionsEnum; docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum; } else { @@ -551,13 +560,23 @@ public class TestPostingsFormat extends LuceneTestCase { docsAndPositionsEnum = null; } } else { - if (VERBOSE) { - System.out.println(" get DocsAndPositionsEnum"); - } if (options.contains(Option.REUSE_ENUMS) && random().nextInt(10) < 9) { prevDocsEnum = threadState.reuseDocsAndPositionsEnum; } - threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, doCheckOffsets); + + int flags = 0; + if (doCheckOffsets || random().nextInt(3) == 1) { + flags |= DocsAndPositionsEnum.FLAG_OFFSETS; + } + if (doCheckPayloads|| random().nextInt(3) == 1) { + flags |= DocsAndPositionsEnum.FLAG_PAYLOADS; + } + + if (VERBOSE) { + System.out.println(" get DocsAndPositionsEnum flags=" + flags); + } + + threadState.reuseDocsAndPositionsEnum = termsEnum.docsAndPositions(liveDocs, (DocsAndPositionsEnum) prevDocsEnum, flags); docsEnum = threadState.reuseDocsAndPositionsEnum; docsAndPositionsEnum = threadState.reuseDocsAndPositionsEnum; } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java index 71c8f8c8d9f..cbd924d042d 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java @@ -83,7 +83,7 @@ public class TestPostingsOffsets extends LuceneTestCase { IndexReader r = w.getReader(); w.close(); - DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("a"), true); + DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("a")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(2, dp.freq()); @@ -95,7 +95,7 @@ public class TestPostingsOffsets extends LuceneTestCase { assertEquals(17, dp.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc()); - dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("b"), true); + dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("b")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(1, dp.freq()); @@ -104,7 +104,7 @@ public class TestPostingsOffsets extends LuceneTestCase { assertEquals(9, dp.endOffset()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc()); - dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("c"), true); + dp = MultiFields.getTermPositionsEnum(r, null, "content", new BytesRef("c")); assertNotNull(dp); assertEquals(0, dp.nextDoc()); assertEquals(1, dp.freq()); @@ -155,7 +155,7 @@ public class TestPostingsOffsets extends LuceneTestCase { String terms[] = { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; for (String term : terms) { - DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef(term), true); + DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while((doc = dp.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { String storedNumbers = reader.document(doc).get("numbers"); @@ -183,7 +183,7 @@ public class TestPostingsOffsets extends LuceneTestCase { for (int j = 0; j < numSkippingTests; j++) { int num = _TestUtil.nextInt(random(), 100, Math.min(numDocs-1, 999)); - DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"), true); + DocsAndPositionsEnum dp = MultiFields.getTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.advance(num); assertEquals(num, doc); int freq = dp.freq(); @@ -312,7 +312,8 @@ public class TestPostingsOffsets extends LuceneTestCase { assertEquals(expected.size(), docs.freq()); } - docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions, false); + // explicitly exclude offsets here + docsAndPositions = termsEnum.docsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS); assertNotNull(docsAndPositions); //System.out.println(" doc/freq/pos"); while((doc = docsAndPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { @@ -327,7 +328,7 @@ public class TestPostingsOffsets extends LuceneTestCase { } } - docsAndPositionsAndOffsets = termsEnum.docsAndPositions(null, docsAndPositions, true); + docsAndPositionsAndOffsets = termsEnum.docsAndPositions(null, docsAndPositions); assertNotNull(docsAndPositionsAndOffsets); //System.out.println(" doc/freq/pos/offs"); while((doc = docsAndPositionsAndOffsets.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java index 2f23c8429bf..204498205f8 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -149,8 +149,7 @@ public class TestSegmentReader extends LuceneTestCase { DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), DocHelper.TEXT_FIELD_1_KEY, - new BytesRef("field"), - false); + new BytesRef("field")); // NOTE: prior rev of this test was failing to first // call next here: assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java b/lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java index e9caa08ea95..8d60cf4cd3a 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestStressIndexing2.java @@ -408,7 +408,7 @@ public class TestStressIndexing2 extends LuceneTestCase { BytesRef term2; while((term2 = termsEnum3.next()) != null) { System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); - dpEnum = termsEnum3.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dpEnum.freq(); @@ -442,7 +442,7 @@ public class TestStressIndexing2 extends LuceneTestCase { BytesRef term2; while((term2 = termsEnum3.next()) != null) { System.out.println(" " + term2.utf8ToString() + ": freq=" + termsEnum3.totalTermFreq()); - dpEnum = termsEnum3.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum3.docsAndPositions(null, dpEnum); if (dpEnum != null) { assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); final int freq = dpEnum.freq(); @@ -632,8 +632,8 @@ public class TestStressIndexing2 extends LuceneTestCase { assertEquals(termsEnum1.totalTermFreq(), termsEnum2.totalTermFreq()); - dpEnum1 = termsEnum1.docsAndPositions(null, dpEnum1, false); - dpEnum2 = termsEnum2.docsAndPositions(null, dpEnum2, false); + dpEnum1 = termsEnum1.docsAndPositions(null, dpEnum1); + dpEnum2 = termsEnum2.docsAndPositions(null, dpEnum2); if (dpEnum1 != null) { assertNotNull(dpEnum2); int docID1 = dpEnum1.nextDoc(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java index 0af3eba1b62..c2b98a2c89f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsReader.java @@ -252,7 +252,7 @@ public class TestTermVectorsReader extends LuceneTestCase { //System.out.println("Term: " + term); assertEquals(testTerms[i], term); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertNotNull(dpEnum); int doc = dpEnum.docID(); assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS); @@ -263,7 +263,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); doc = dpEnum.docID(); assertTrue(doc == -1 || doc == DocIdSetIterator.NO_MORE_DOCS); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -306,7 +306,7 @@ public class TestTermVectorsReader extends LuceneTestCase { String term = text.utf8ToString(); assertEquals(testTerms[i], term); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(dpEnum.freq(), positions[i].length); @@ -315,7 +315,7 @@ public class TestTermVectorsReader extends LuceneTestCase { } assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertNotNull(dpEnum); assertEquals(dpEnum.freq(), positions[i].length); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java index 8751031cb15..fb2a425e651 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java @@ -69,7 +69,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { // Token "" occurred once assertEquals(1, termsEnum.totalTermFreq()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(8, dpEnum.startOffset()); @@ -78,7 +78,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { // Token "abcd" occurred three times assertEquals(new BytesRef("abcd"), termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertEquals(3, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -118,7 +118,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(2, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -153,7 +153,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(2, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -192,7 +192,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(2, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -228,7 +228,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(2, termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -265,7 +265,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); @@ -273,14 +273,14 @@ public class TestTermVectorsWriter extends LuceneTestCase { assertEquals(4, dpEnum.endOffset()); assertNotNull(termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(11, dpEnum.startOffset()); assertEquals(17, dpEnum.endOffset()); assertNotNull(termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(18, dpEnum.startOffset()); @@ -310,7 +310,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(1, (int) termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -319,7 +319,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { assertEquals(7, dpEnum.endOffset()); assertNotNull(termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(8, dpEnum.startOffset()); @@ -353,7 +353,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { IndexReader r = DirectoryReader.open(dir); TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator(null); assertNotNull(termsEnum.next()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, true); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertEquals(1, (int) termsEnum.totalTermFreq()); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -362,7 +362,7 @@ public class TestTermVectorsWriter extends LuceneTestCase { assertEquals(4, dpEnum.endOffset()); assertNotNull(termsEnum.next()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); dpEnum.nextPosition(); assertEquals(6, dpEnum.startOffset()); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java index 89571d60e03..e1110bbdec2 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -103,8 +103,7 @@ public class TestPositionIncrement extends LuceneTestCase { DocsAndPositionsEnum pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), MultiFields.getLiveDocs(searcher.getIndexReader()), "field", - new BytesRef("1"), - false); + new BytesRef("1")); pos.nextDoc(); // first token should be at position 0 assertEquals(0, pos.nextPosition()); @@ -112,8 +111,7 @@ public class TestPositionIncrement extends LuceneTestCase { pos = MultiFields.getTermPositionsEnum(searcher.getIndexReader(), MultiFields.getLiveDocs(searcher.getIndexReader()), "field", - new BytesRef("2"), - false); + new BytesRef("2")); pos.nextDoc(); // second token should be at position 2 assertEquals(2, pos.nextPosition()); @@ -216,8 +214,7 @@ public class TestPositionIncrement extends LuceneTestCase { DocsAndPositionsEnum tp = r.termPositionsEnum(r.getLiveDocs(), "content", - new BytesRef("a"), - false); + new BytesRef("a")); int count = 0; assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java index 02e2dd09333..ab977f1a7af 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestTermVectors.java @@ -134,19 +134,19 @@ public class TestTermVectors extends LuceneTestCase { assertNotNull(terms); TermsEnum termsEnum = terms.iterator(null); assertEquals("content", termsEnum.next().utf8ToString()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(expectedPositions[0], dpEnum.nextPosition()); assertEquals("here", termsEnum.next().utf8ToString()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(expectedPositions[1], dpEnum.nextPosition()); assertEquals("some", termsEnum.next().utf8ToString()); - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(1, dpEnum.freq()); assertEquals(expectedPositions[2], dpEnum.nextPosition()); @@ -176,7 +176,7 @@ public class TestTermVectors extends LuceneTestCase { if (shouldBePosVector || shouldBeOffVector) { while(true) { - dpEnum = termsEnum.docsAndPositions(null, dpEnum, shouldBeOffVector); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -426,7 +426,7 @@ public class TestTermVectors extends LuceneTestCase { assertNotNull(termsEnum.next()); assertEquals("one", termsEnum.term().utf8ToString()); assertEquals(5, termsEnum.totalTermFreq()); - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(5, dpEnum.freq()); @@ -434,7 +434,7 @@ public class TestTermVectors extends LuceneTestCase { assertEquals(i, dpEnum.nextPosition()); } - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); assertNotNull(dpEnum); assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(5, dpEnum.freq()); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java index 6bcc63df425..0ce41e09aa1 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java @@ -53,7 +53,7 @@ public class PayloadIterator { this.buffer = buffer; // TODO (Facet): avoid Multi*? Bits liveDocs = MultiFields.getLiveDocs(indexReader); - this.tp = MultiFields.getTermPositionsEnum(indexReader, liveDocs, term.field(), term.bytes(), false); + this.tp = MultiFields.getTermPositionsEnum(indexReader, liveDocs, term.field(), term.bytes(), DocsAndPositionsEnum.FLAG_PAYLOADS); } /** diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java index 88897ccbeca..4b6b1d62ae5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/ParentArray.java @@ -106,7 +106,7 @@ class ParentArray { Bits liveDocs = MultiFields.getLiveDocs(indexReader); DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(indexReader, liveDocs, Consts.FIELD_PAYLOADS, new BytesRef(Consts.PAYLOAD_PARENT), - false); + DocsAndPositionsEnum.FLAG_PAYLOADS); if ((positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) && first < num) { throw new CorruptIndexException("Missing parent data for category " + first); } diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java index 8606e171925..f17b112b528 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java @@ -127,7 +127,7 @@ public class TokenSources { private static boolean hasPositions(Terms vector) throws IOException { final TermsEnum termsEnum = vector.iterator(null); if (termsEnum.next() != null) { - DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, false); + DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS); if (dpEnum != null) { int doc = dpEnum.nextDoc(); assert doc >= 0 && doc != DocIdSetIterator.NO_MORE_DOCS; @@ -222,18 +222,21 @@ public class TokenSources { DocsAndPositionsEnum dpEnum = null; while ((text = termsEnum.next()) != null) { - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); if (dpEnum == null) { throw new IllegalArgumentException( "Required TermVector Offset information was not found"); } - final String term = text.utf8ToString(); dpEnum.nextDoc(); final int freq = dpEnum.freq(); for(int posUpto=0;posUpto= 0; Token token; if (hasOffsets) { token = new Token(text.utf8ToString(), diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java index 9cf018eb721..236437cd1b1 100644 --- a/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java +++ b/lucene/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java @@ -104,7 +104,7 @@ public class FieldTermStack { if (!termSet.contains(term)) { continue; } - dpEnum = termsEnum.docsAndPositions(null, dpEnum, true); + dpEnum = termsEnum.docsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; @@ -118,6 +118,9 @@ public class FieldTermStack { int pos = dpEnum.nextPosition(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html weight = ( float ) ( Math.log( numDocs / ( double ) ( reader.docFreq( fieldName, text ) + 1 ) ) + 1.0 ); + if (dpEnum.startOffset() < 0) { + return; // no offsets, null snippet + } termList.add( new TermInfo( term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight ) ); } } diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index cf1724b3f7e..9cdbe8796de 100644 --- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -880,10 +880,7 @@ public class MemoryIndex { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { - if (needsOffsets) { - return null; - } + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) { reuse = new MemoryDocsAndPositionsEnum(); } diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index f67c684459f..a9ccb2516ae 100644 --- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -225,7 +225,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { MemoryIndex memory = new MemoryIndex(true); memory.addField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader) memory.createSearcher().getIndexReader(); - DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar"), false); + DocsAndPositionsEnum disi = reader.termPositionsEnum(null, "foo", new BytesRef("bar")); int docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); @@ -236,7 +236,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { // now reuse and check again TermsEnum te = reader.terms("foo").iterator(null); assertTrue(te.seekExact(new BytesRef("bar"), true)); - disi = te.docsAndPositions(null, disi, false); + disi = te.docsAndPositions(null, disi); docid = disi.docID(); assertTrue(docid == -1 || docid == DocIdSetIterator.NO_MORE_DOCS); assertTrue(disi.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java index cbe9c6550e6..5296e7b8a39 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/ramonly/RAMOnlyPostingsFormat.java @@ -391,10 +391,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) { - if (needsOffsets) { - return null; - } + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { return new RAMDocsAndPositionsEnum(ramField.termToDocs.get(current), liveDocs); } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java index 5141b204c03..418e340b6ef 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java @@ -138,7 +138,7 @@ public class AssertingAtomicReader extends FilterAtomicReader { } @Override - public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, boolean needsOffsets) throws IOException { + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException { assert state == State.POSITIONED: "docsAndPositions(...) called on unpositioned TermsEnum"; // TODO: should we give this thing a random to be super-evil, @@ -146,7 +146,7 @@ public class AssertingAtomicReader extends FilterAtomicReader { if (reuse instanceof AssertingDocsAndPositionsEnum) { reuse = ((AssertingDocsAndPositionsEnum) reuse).in; } - DocsAndPositionsEnum docs = super.docsAndPositions(liveDocs, reuse, needsOffsets); + DocsAndPositionsEnum docs = super.docsAndPositions(liveDocs, reuse, flags); return docs == null ? null : new AssertingDocsAndPositionsEnum(docs); } diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java index 2a719630fa8..59a50799ce7 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java +++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java @@ -866,13 +866,23 @@ public class _TestUtil { if (!termsEnum.seekExact(term, random.nextBoolean())) { return null; } + return docs(random, termsEnum, liveDocs, reuse, needsFreqs); + } + + // Returns a DocsEnum from a positioned TermsEnum, but + // randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum. + public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { if (random.nextBoolean()) { if (random.nextBoolean()) { - // TODO: cast re-use to D&PE if we can...? - DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true); - if (docsAndPositions == null) { - docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false); + final int flags; + switch (random.nextInt(4)) { + case 0: flags = 0; break; + case 1: flags = DocsAndPositionsEnum.FLAG_OFFSETS; break; + case 2: flags = DocsAndPositionsEnum.FLAG_PAYLOADS; break; + default: flags = DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS; break; } + // TODO: cast to DocsAndPositionsEnum? + DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, flags); if (docsAndPositions != null) { return docsAndPositions; } @@ -884,28 +894,6 @@ public class _TestUtil { } return termsEnum.docs(liveDocs, reuse, needsFreqs); } - - // Returns a DocsEnum from a positioned TermsEnum, but - // randomly sometimes uses a DocsAndFreqsEnum, DocsAndPositionsEnum. - public static DocsEnum docs(Random random, TermsEnum termsEnum, Bits liveDocs, DocsEnum reuse, boolean needsFreqs) throws IOException { - if (random.nextBoolean()) { - if (random.nextBoolean()) { - // TODO: cast re-use to D&PE if we can...? - DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, true); - if (docsAndPositions == null) { - docsAndPositions = termsEnum.docsAndPositions(liveDocs, null, false); - } - if (docsAndPositions != null) { - return docsAndPositions; - } - } - final DocsEnum docsAndFreqs = termsEnum.docs(liveDocs, null, true); - if (docsAndFreqs != null) { - return docsAndFreqs; - } - } - return termsEnum.docs(liveDocs, null, needsFreqs); - } public static CharSequence stringToCharSequence(String string, Random random) { return bytesToCharSequence(new BytesRef(string), random); diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java index 5b94c49a741..da050f95891 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java @@ -329,28 +329,19 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar termInfo.add("tf", freq); } - dpEnum = termsEnum.docsAndPositions(null, dpEnum, fieldOptions.offsets); - boolean useOffsets = fieldOptions.offsets; - if (dpEnum == null) { - useOffsets = false; - dpEnum = termsEnum.docsAndPositions(null, dpEnum, false); - } - + dpEnum = termsEnum.docsAndPositions(null, dpEnum); + boolean useOffsets = false; boolean usePositions = false; if (dpEnum != null) { dpEnum.nextDoc(); usePositions = fieldOptions.positions; - } - - NamedList theOffsets = null; - if (useOffsets) { - theOffsets = new NamedList(); - termInfo.add("offsets", theOffsets); + useOffsets = fieldOptions.offsets; } NamedList positionsNL = null; + NamedList theOffsets = null; - if (usePositions || theOffsets != null) { + if (usePositions || useOffsets) { for (int i = 0; i < freq; i++) { final int pos = dpEnum.nextPosition(); if (usePositions && pos >= 0) { @@ -361,6 +352,15 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar positionsNL.add("position", pos); } + if (useOffsets && theOffsets == null) { + if (dpEnum.startOffset() == -1) { + useOffsets = false; + } else { + theOffsets = new NamedList(); + termInfo.add("offsets", theOffsets); + } + } + if (theOffsets != null) { theOffsets.add("start", dpEnum.startOffset()); theOffsets.add("end", dpEnum.endOffset());