mirror of https://github.com/apache/lucene.git
LUCENE-4524: Replace DocsEnum and DocsAndPositionsEnum with PostingsEnum
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1657800 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc3df31e2d
commit
c13216934c
|
@ -12,6 +12,7 @@
|
|||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="library" scope="TEST" name="JUnit" level="project" />
|
||||
<orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
|
||||
<orderEntry type="module" scope="TEST" module-name="codecs" />
|
||||
<orderEntry type="module" module-name="memory" />
|
||||
<orderEntry type="module" module-name="misc" />
|
||||
<orderEntry type="module" module-name="queries" />
|
||||
|
|
|
@ -74,6 +74,11 @@ API Changes
|
|||
* LUCENE-6218: Add Collector.needsScores() and needsScores parameter
|
||||
to Weight.scorer(). (Robert Muir)
|
||||
|
||||
* LUCENE-4524: Merge DocsEnum and DocsAndPositionsEnum into a single
|
||||
PostingsEnum iterator. TermsEnum.docs() and TermsEnum.docsAndPositions()
|
||||
are replaced by TermsEnum.postings(). (Alan Woodward, Simon Willnauer,
|
||||
Robert Muir)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-6193: Collapse identical catch branches in try-catch statements.
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -96,7 +96,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
writer.close();
|
||||
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
DocsEnum td = TestUtil.docs(random(),
|
||||
PostingsEnum td = TestUtil.docs(random(),
|
||||
reader,
|
||||
"partnum",
|
||||
new BytesRef("Q36"),
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
TermsEnum termsEnum = vector.iterator(null);
|
||||
termsEnum.next();
|
||||
assertEquals(2, termsEnum.totalTermFreq());
|
||||
DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
|
||||
PostingsEnum positions = termsEnum.postings(null, null, PostingsEnum.FLAG_ALL);
|
||||
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
|
||||
assertEquals(2, positions.freq());
|
||||
positions.nextPosition();
|
||||
|
|
|
@ -2,12 +2,11 @@ package org.apache.lucene.analysis.standard;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -281,7 +280,7 @@ public class TestClassicAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
// Make sure position is still incremented when
|
||||
// massive term is skipped:
|
||||
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
|
||||
PostingsEnum tps = MultiFields.getTermPositionsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"content",
|
||||
new BytesRef("another"));
|
||||
|
|
|
@ -932,7 +932,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
private int countDocs(DocsEnum docs) throws IOException {
|
||||
private int countDocs(PostingsEnum docs) throws IOException {
|
||||
int count = 0;
|
||||
while((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
count ++;
|
||||
|
@ -958,7 +958,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
// should be found exactly
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND,
|
||||
terms.seekCeil(aaaTerm));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
|
||||
assertNull(terms.next());
|
||||
|
||||
// should hit end of field
|
||||
|
@ -970,12 +970,12 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
assertEquals(TermsEnum.SeekStatus.NOT_FOUND,
|
||||
terms.seekCeil(new BytesRef("a")));
|
||||
assertTrue(terms.term().bytesEquals(aaaTerm));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
|
||||
assertNull(terms.next());
|
||||
|
||||
assertEquals(TermsEnum.SeekStatus.FOUND,
|
||||
terms.seekCeil(aaaTerm));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, DocsEnum.FLAG_NONE)));
|
||||
assertEquals(35, countDocs(TestUtil.docs(random(), terms, null, null, PostingsEnum.FLAG_NONE)));
|
||||
assertNull(terms.next());
|
||||
|
||||
r.close();
|
||||
|
|
|
@ -40,7 +40,7 @@ import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
|
|||
import org.apache.lucene.collation.CollationKeyAnalyzer;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -497,9 +497,9 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
continue;
|
||||
}
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
DocsEnum docs = null;
|
||||
PostingsEnum docs = null;
|
||||
while(termsEnum.next() != null) {
|
||||
docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
|
||||
docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, PostingsEnum.FLAG_FREQS);
|
||||
while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
totalTokenCount2 += docs.freq();
|
||||
}
|
||||
|
|
|
@ -30,8 +30,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
|
@ -652,22 +651,19 @@ public class BlockTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if (PostingsEnum.requiresPositions(flags)) {
|
||||
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("BTR.docs this=" + this);
|
||||
decodeMetaData();
|
||||
//System.out.println("BTR.docs: state.docFreq=" + state.docFreq);
|
||||
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
|
||||
decodeMetaData();
|
||||
return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
|
||||
return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -20,8 +20,7 @@ package org.apache.lucene.codecs.blocktreeords;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -203,20 +202,17 @@ final class OrdsIntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
}
|
||||
public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
if (PostingsEnum.requiresPositions(flags)) {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
private int getState() {
|
||||
|
|
|
@ -25,8 +25,7 @@ import java.io.PrintStream;
|
|||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -924,7 +923,15 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if (PostingsEnum.requiresPositions(flags)) {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
assert !eof;
|
||||
//if (DEBUG) {
|
||||
//System.out.println("BTTR.docs seg=" + segment);
|
||||
|
@ -933,19 +940,7 @@ public final class OrdsSegmentTermsEnum extends TermsEnum {
|
|||
//if (DEBUG) {
|
||||
//System.out.println(" state=" + currentFrame.state);
|
||||
//}
|
||||
return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
|
||||
assert !eof;
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -32,8 +32,7 @@ import org.apache.lucene.codecs.FieldsConsumer;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.bloom.FuzzySet.ContainsResult;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -382,19 +381,13 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
public long totalTermFreq() throws IOException {
|
||||
return delegate().totalTermFreq();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return delegate().docsAndPositions(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags)
|
||||
throws IOException {
|
||||
return delegate().docs(liveDocs, reuse, flags);
|
||||
return delegate().postings(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -460,7 +453,7 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
|
||||
FuzzySet bloomFilter = null;
|
||||
|
||||
DocsEnum docsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
|
@ -476,8 +469,8 @@ public final class BloomFilteringPostingsFormat extends PostingsFormat {
|
|||
bloomFilters.put(fieldInfo, bloomFilter);
|
||||
}
|
||||
// Make sure there's at least one doc for this term:
|
||||
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||
if (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, 0);
|
||||
if (postingsEnum.nextDoc() != PostingsEnum.NO_MORE_DOCS) {
|
||||
bloomFilter.addValue(term);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,13 +27,13 @@ import java.util.TreeMap;
|
|||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat; // javadocs
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -51,7 +51,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;
|
|||
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
|
||||
// TODO:
|
||||
// TODO:
|
||||
// - build depth-N prefix hash?
|
||||
// - or: longer dense skip lists than just next byte?
|
||||
|
||||
|
@ -62,7 +62,7 @@ import org.apache.lucene.util.automaton.Transition;
|
|||
* <p><b>WARNING</b>: This is
|
||||
* exceptionally RAM intensive: it makes no effort to
|
||||
* compress the postings data, storing terms as separate
|
||||
* byte[] and postings as separate int[], but as a result it
|
||||
* byte[] and postings as separate int[], but as a result it
|
||||
* gives substantial increase in search performance.
|
||||
*
|
||||
* <p>This postings format supports {@link TermsEnum#ord}
|
||||
|
@ -89,7 +89,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
public DirectPostingsFormat() {
|
||||
this(DEFAULT_MIN_SKIP_COUNT, DEFAULT_LOW_FREQ_CUTOFF);
|
||||
}
|
||||
|
||||
|
||||
/** minSkipCount is how many terms in a row must have the
|
||||
* same prefix before we put a skip pointer down. Terms
|
||||
* with docFreq <= lowFreqCutoff will use a single int[]
|
||||
|
@ -100,7 +100,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
this.minSkipCount = minSkipCount;
|
||||
this.lowFreqCutoff = lowFreqCutoff;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
return PostingsFormat.forName("Lucene50").fieldsConsumer(state);
|
||||
|
@ -161,7 +161,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return Accountables.namedAccountables("field", fields);
|
||||
|
@ -206,9 +206,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED +
|
||||
((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) +
|
||||
((postings!=null) ? RamUsageEstimator.sizeOf(postings) : 0) +
|
||||
((payloads!=null) ? RamUsageEstimator.sizeOf(payloads) : 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// TODO: maybe specialize into prx/no-prx/no-frq cases?
|
||||
|
@ -232,31 +233,32 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
long sizeInBytes = BASE_RAM_BYTES_USED;
|
||||
sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0;
|
||||
sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0;
|
||||
|
||||
if(positions != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(positions);
|
||||
for(int[] position : positions) {
|
||||
sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (payloads != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(payloads);
|
||||
for(byte[][] payload : payloads) {
|
||||
if(payload != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(payload);
|
||||
for(byte[] pload : payload) {
|
||||
sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes;
|
||||
long sizeInBytes = BASE_RAM_BYTES_USED;
|
||||
sizeInBytes += (docIDs!=null)? RamUsageEstimator.sizeOf(docIDs) : 0;
|
||||
sizeInBytes += (freqs!=null)? RamUsageEstimator.sizeOf(freqs) : 0;
|
||||
|
||||
if(positions != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(positions);
|
||||
for(int[] position : positions) {
|
||||
sizeInBytes += (position!=null) ? RamUsageEstimator.sizeOf(position) : 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (payloads != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(payloads);
|
||||
for(byte[][] payload : payloads) {
|
||||
if(payload != null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(payload);
|
||||
for(byte[] pload : payload) {
|
||||
sizeInBytes += (pload!=null) ? RamUsageEstimator.sizeOf(pload) : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private final byte[] termBytes;
|
||||
|
@ -313,7 +315,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
terms = new TermAndSkip[numTerms];
|
||||
termOffsets = new int[1+numTerms];
|
||||
|
||||
|
||||
byte[] termBytes = new byte[1024];
|
||||
|
||||
this.minSkipCount = minSkipCount;
|
||||
|
@ -324,8 +326,8 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
hasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
BytesRef term;
|
||||
DocsEnum docsEnum = null;
|
||||
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
PostingsEnum docsAndPositionsEnum = null;
|
||||
final TermsEnum termsEnum = termsIn.iterator(null);
|
||||
int termOffset = 0;
|
||||
|
||||
|
@ -356,18 +358,18 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
termOffsets[count+1] = termOffset;
|
||||
|
||||
if (hasPos) {
|
||||
docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
|
||||
docsAndPositionsEnum = termsEnum.postings(null, docsAndPositionsEnum, PostingsEnum.FLAG_ALL);
|
||||
} else {
|
||||
docsEnum = termsEnum.docs(null, docsEnum);
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum);
|
||||
}
|
||||
|
||||
final TermAndSkip ent;
|
||||
|
||||
final DocsEnum docsEnum2;
|
||||
final PostingsEnum postingsEnum2;
|
||||
if (hasPos) {
|
||||
docsEnum2 = docsAndPositionsEnum;
|
||||
postingsEnum2 = docsAndPositionsEnum;
|
||||
} else {
|
||||
docsEnum2 = docsEnum;
|
||||
postingsEnum2 = postingsEnum;
|
||||
}
|
||||
|
||||
int docID;
|
||||
|
@ -377,10 +379,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
ros.reset();
|
||||
|
||||
// Pack postings for low-freq terms into a single int[]:
|
||||
while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
scratch.add(docID);
|
||||
if (hasFreq) {
|
||||
final int freq = docsEnum2.freq();
|
||||
final int freq = postingsEnum2.freq();
|
||||
scratch.add(freq);
|
||||
if (hasPos) {
|
||||
for(int pos=0;pos<freq;pos++) {
|
||||
|
@ -412,7 +414,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
final int[] postings = scratch.get();
|
||||
|
||||
|
||||
ent = new LowFreqTerm(postings, payloads, docFreq, (int) totalTermFreq);
|
||||
} else {
|
||||
final int[] docs = new int[docFreq];
|
||||
|
@ -441,10 +443,10 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
// Use separate int[] for the postings for high-freq
|
||||
// terms:
|
||||
int upto = 0;
|
||||
while ((docID = docsEnum2.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
|
||||
while ((docID = postingsEnum2.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
docs[upto] = docID;
|
||||
if (hasFreq) {
|
||||
final int freq = docsEnum2.freq();
|
||||
final int freq = postingsEnum2.freq();
|
||||
freqs[upto] = freq;
|
||||
if (hasPos) {
|
||||
final int mult;
|
||||
|
@ -524,14 +526,14 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
sizeInBytes += ((skips!=null) ? RamUsageEstimator.sizeOf(skips) : 0);
|
||||
sizeInBytes += ((skipOffsets!=null) ? RamUsageEstimator.sizeOf(skipOffsets) : 0);
|
||||
sizeInBytes += ((sameCounts!=null) ? RamUsageEstimator.sizeOf(sameCounts) : 0);
|
||||
|
||||
|
||||
if(terms!=null) {
|
||||
sizeInBytes += RamUsageEstimator.shallowSizeOf(terms);
|
||||
for(TermAndSkip termAndSkip : terms) {
|
||||
sizeInBytes += (termAndSkip!=null) ? termAndSkip.ramBytesUsed() : 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return sizeInBytes;
|
||||
}
|
||||
|
||||
|
@ -547,7 +549,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
int upto = termOffsets[ord];
|
||||
final int termLen = termOffsets[1+ord] - upto;
|
||||
int otherUpto = other.offset;
|
||||
|
||||
|
||||
final int stop = upto + Math.min(termLen, other.length);
|
||||
while (upto < stop) {
|
||||
int diff = (termBytes[upto++] & 0xFF) - (otherBytes[otherUpto++] & 0xFF);
|
||||
|
@ -555,7 +557,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return diff;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// One is a prefix of the other, or, they are equal:
|
||||
return termLen - other.length;
|
||||
}
|
||||
|
@ -707,7 +709,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
public boolean hasPositions() {
|
||||
return hasPos;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
|
@ -855,10 +857,26 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
// TODO: implement reuse
|
||||
// it's hairy!
|
||||
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
|
||||
if (!hasPos) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
|
||||
final int[] postings = term.postings;
|
||||
final byte[] payloads = term.payloads;
|
||||
return new LowFreqPostingsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
|
||||
} else {
|
||||
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
|
||||
return new HighFreqPostingsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
|
||||
}
|
||||
}
|
||||
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
|
||||
if (hasFreq) {
|
||||
|
@ -927,25 +945,6 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
if (!hasPos) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: implement reuse
|
||||
// it's hairy!
|
||||
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
|
||||
final int[] postings = term.postings;
|
||||
final byte[] payloads = term.payloads;
|
||||
return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
|
||||
} else {
|
||||
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
|
||||
return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class DirectIntersectTermsEnum extends TermsEnum {
|
||||
|
@ -1203,7 +1202,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
// if (DEBUG) {
|
||||
// System.out.println(" term=" + new BytesRef(termBytes, termOffset, termLength).utf8ToString() + " skips=" + Arrays.toString(skips));
|
||||
// }
|
||||
|
||||
|
||||
assert termOrd < state.changeOrd;
|
||||
|
||||
assert stateUpto <= termLength: "term.length=" + termLength + "; stateUpto=" + stateUpto;
|
||||
|
@ -1336,7 +1335,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
compiledAutomaton.automaton.initTransition(nextState, states[stateUpto].transition);
|
||||
states[stateUpto].transitionUpto = -1;
|
||||
states[stateUpto].transitionMax = -1;
|
||||
|
||||
|
||||
if (stateUpto == termLength) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" term ends after push");
|
||||
|
@ -1453,9 +1452,23 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
|
||||
// TODO: implement reuse
|
||||
// it's hairy!
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
|
||||
if (!hasPos) {
|
||||
return null;
|
||||
}
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
|
||||
final int[] postings = term.postings;
|
||||
final byte[] payloads = term.payloads;
|
||||
return new LowFreqPostingsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
|
||||
} else {
|
||||
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
|
||||
return new HighFreqPostingsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
|
||||
}
|
||||
}
|
||||
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final int[] postings = ((LowFreqTerm) terms[termOrd]).postings;
|
||||
|
@ -1484,26 +1497,6 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
if (!hasPos) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: implement reuse
|
||||
// it's hairy!
|
||||
|
||||
if (terms[termOrd] instanceof LowFreqTerm) {
|
||||
final LowFreqTerm term = ((LowFreqTerm) terms[termOrd]);
|
||||
final int[] postings = term.postings;
|
||||
final byte[] payloads = term.payloads;
|
||||
return new LowFreqDocsAndPositionsEnum(liveDocs, hasOffsets, hasPayloads).reset(postings, payloads);
|
||||
} else {
|
||||
final HighFreqTerm term = (HighFreqTerm) terms[termOrd];
|
||||
return new HighFreqDocsAndPositionsEnum(liveDocs, hasOffsets).reset(term.docIDs, term.freqs, term.positions, term.payloads);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef term) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -1530,7 +1523,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return liveDocs == this.liveDocs;
|
||||
}
|
||||
|
||||
public DocsEnum reset(int[] postings) {
|
||||
public PostingsEnum reset(int[] postings) {
|
||||
this.postings = postings;
|
||||
upto = -1;
|
||||
return this;
|
||||
|
@ -1572,13 +1565,19 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// Linear scan, but this is low-freq term so it won't
|
||||
// be costly:
|
||||
return slowAdvance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return postings.length;
|
||||
|
@ -1599,7 +1598,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return liveDocs == this.liveDocs;
|
||||
}
|
||||
|
||||
public DocsEnum reset(int[] postings) {
|
||||
public PostingsEnum reset(int[] postings) {
|
||||
this.postings = postings;
|
||||
upto = -2;
|
||||
return this;
|
||||
|
@ -1640,13 +1639,19 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return postings[upto+1];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// Linear scan, but this is low-freq term so it won't
|
||||
// be costly:
|
||||
return slowAdvance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return postings.length / 2;
|
||||
|
@ -1673,7 +1678,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return liveDocs == this.liveDocs && posMult == this.posMult;
|
||||
}
|
||||
|
||||
public DocsEnum reset(int[] postings) {
|
||||
public PostingsEnum reset(int[] postings) {
|
||||
this.postings = postings;
|
||||
upto = -2;
|
||||
freq = 0;
|
||||
|
@ -1688,7 +1693,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
// System.out.println(" nextDoc freq=" + freq + " upto=" + upto + " vs " + postings.length);
|
||||
// }
|
||||
if (liveDocs == null) {
|
||||
if (upto < postings.length) {
|
||||
if (upto < postings.length) {
|
||||
freq = postings[upto+1];
|
||||
assert freq > 0;
|
||||
return postings[upto];
|
||||
|
@ -1724,13 +1729,19 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false : "should be using LowFreqDocsAndPositionsEnum";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
// Linear scan, but this is low-freq term so it won't
|
||||
// be costly:
|
||||
return slowAdvance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// TODO: could do a better estimate
|
||||
|
@ -1738,7 +1749,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
private final static class LowFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private final static class LowFreqPostingsEnum extends PostingsEnum {
|
||||
private int[] postings;
|
||||
private final Bits liveDocs;
|
||||
private final int posMult;
|
||||
|
@ -1749,6 +1760,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
private int docID;
|
||||
private int freq;
|
||||
private int skipPositions;
|
||||
private int pos;
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private int lastPayloadOffset;
|
||||
|
@ -1756,7 +1768,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
private int payloadLength;
|
||||
private byte[] payloadBytes;
|
||||
|
||||
public LowFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
|
||||
public LowFreqPostingsEnum(Bits liveDocs, boolean hasOffsets, boolean hasPayloads) {
|
||||
this.liveDocs = liveDocs;
|
||||
this.hasOffsets = hasOffsets;
|
||||
this.hasPayloads = hasPayloads;
|
||||
|
@ -1773,10 +1785,11 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
}
|
||||
|
||||
public DocsAndPositionsEnum reset(int[] postings, byte[] payloadBytes) {
|
||||
public PostingsEnum reset(int[] postings, byte[] payloadBytes) {
|
||||
this.postings = postings;
|
||||
upto = 0;
|
||||
skipPositions = 0;
|
||||
pos = -1;
|
||||
startOffset = -1;
|
||||
endOffset = -1;
|
||||
docID = -1;
|
||||
|
@ -1787,6 +1800,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
pos = -1;
|
||||
if (hasPayloads) {
|
||||
for(int i=0;i<skipPositions;i++) {
|
||||
upto++;
|
||||
|
@ -1845,7 +1859,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
public int nextPosition() {
|
||||
assert skipPositions > 0;
|
||||
skipPositions--;
|
||||
final int pos = postings[upto++];
|
||||
pos = postings[upto++];
|
||||
if (hasOffsets) {
|
||||
startOffset = postings[upto++];
|
||||
endOffset = postings[upto++];
|
||||
|
@ -1884,7 +1898,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// TODO: could do a better estimate
|
||||
|
@ -1916,7 +1930,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return freqs;
|
||||
}
|
||||
|
||||
public DocsEnum reset(int[] docIDs, int[] freqs) {
|
||||
public PostingsEnum reset(int[] docIDs, int[] freqs) {
|
||||
this.docIDs = docIDs;
|
||||
this.freqs = freqs;
|
||||
docID = upto = -1;
|
||||
|
@ -2063,7 +2077,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return docID = docIDs[upto];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docIDs.length;
|
||||
|
@ -2071,7 +2085,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
// TODO: specialize offsets and not
|
||||
private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private final static class HighFreqPostingsEnum extends PostingsEnum {
|
||||
private int[] docIDs;
|
||||
private int[] freqs;
|
||||
private int[][] positions;
|
||||
|
@ -2084,7 +2098,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
private int posUpto;
|
||||
private int[] curPositions;
|
||||
|
||||
public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
|
||||
public HighFreqPostingsEnum(Bits liveDocs, boolean hasOffsets) {
|
||||
this.liveDocs = liveDocs;
|
||||
this.hasOffsets = hasOffsets;
|
||||
posJump = hasOffsets ? 3 : 1;
|
||||
|
@ -2106,7 +2120,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return liveDocs;
|
||||
}
|
||||
|
||||
public DocsAndPositionsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
|
||||
public PostingsEnum reset(int[] docIDs, int[] freqs, int[][] positions, byte[][][] payloads) {
|
||||
this.docIDs = docIDs;
|
||||
this.freqs = freqs;
|
||||
this.positions = positions;
|
||||
|
@ -2120,7 +2134,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
upto++;
|
||||
if (liveDocs == null) {
|
||||
if (upto < docIDs.length) {
|
||||
posUpto = -posJump;
|
||||
posUpto = -posJump;
|
||||
curPositions = positions[upto];
|
||||
return docID = docIDs[upto];
|
||||
}
|
||||
|
@ -2151,6 +2165,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
@Override
|
||||
public int nextPosition() {
|
||||
posUpto += posJump;
|
||||
assert posUpto < curPositions.length;
|
||||
return curPositions[posUpto];
|
||||
}
|
||||
|
||||
|
@ -2301,7 +2316,7 @@ public final class DirectPostingsFormat extends PostingsFormat {
|
|||
return payload;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docIDs.length;
|
||||
|
|
|
@ -32,8 +32,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -428,18 +427,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (!hasPositions()) {
|
||||
return null;
|
||||
}
|
||||
decodeMetaData();
|
||||
return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
|
||||
return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
// TODO: this can be achieved by making use of Util.getByOutput()
|
||||
|
|
|
@ -31,8 +31,7 @@ import org.apache.lucene.codecs.CodecUtil;
|
|||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -291,18 +290,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
decodeMetaData();
|
||||
return postingsReader.docs(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (!hasPositions()) {
|
||||
return null;
|
||||
}
|
||||
decodeMetaData();
|
||||
return postingsReader.docsAndPositions(fieldInfo, state, liveDocs, reuse, flags);
|
||||
return postingsReader.postings(fieldInfo, state, liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,8 +31,7 @@ import org.apache.lucene.codecs.DocValuesProducer;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -893,13 +892,9 @@ class MemoryDocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,8 +31,8 @@ import org.apache.lucene.codecs.FieldsProducer;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.TermStats;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -317,8 +317,8 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.getDocCount());
|
||||
long sumTotalTermFreq = 0;
|
||||
long sumDocFreq = 0;
|
||||
DocsEnum docsEnum = null;
|
||||
DocsAndPositionsEnum posEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
PostingsEnum posEnum = null;
|
||||
int enumFlags;
|
||||
|
||||
IndexOptions indexOptions = fieldInfo.getIndexOptions();
|
||||
|
@ -330,18 +330,19 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
if (writeFreqs == false) {
|
||||
enumFlags = 0;
|
||||
} else if (writePositions == false) {
|
||||
enumFlags = DocsEnum.FLAG_FREQS;
|
||||
enumFlags = PostingsEnum.FLAG_FREQS;
|
||||
} else if (writeOffsets == false) {
|
||||
if (writePayloads) {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
|
||||
} else {
|
||||
enumFlags = 0;
|
||||
enumFlags = PostingsEnum.FLAG_PAYLOADS;
|
||||
}
|
||||
else {
|
||||
enumFlags = PostingsEnum.FLAG_POSITIONS;
|
||||
}
|
||||
} else {
|
||||
if (writePayloads) {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
|
||||
enumFlags = PostingsEnum.FLAG_PAYLOADS | PostingsEnum.FLAG_OFFSETS;
|
||||
} else {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
|
||||
enumFlags = PostingsEnum.FLAG_OFFSETS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -353,18 +354,18 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
termsWriter.postingsWriter.reset();
|
||||
|
||||
if (writePositions) {
|
||||
posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
|
||||
docsEnum = posEnum;
|
||||
posEnum = termsEnum.postings(null, posEnum, enumFlags);
|
||||
postingsEnum = posEnum;
|
||||
} else {
|
||||
docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, enumFlags);
|
||||
posEnum = null;
|
||||
}
|
||||
|
||||
int docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
while (true) {
|
||||
int docID = docsEnum.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
int docID = postingsEnum.nextDoc();
|
||||
if (docID == PostingsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
docsSeen.set(docID);
|
||||
|
@ -372,7 +373,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
|
||||
int freq;
|
||||
if (writeFreqs) {
|
||||
freq = docsEnum.freq();
|
||||
freq = postingsEnum.freq();
|
||||
totalTermFreq += freq;
|
||||
} else {
|
||||
freq = -1;
|
||||
|
@ -545,14 +546,14 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
public int freq() {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return numDocs;
|
||||
}
|
||||
}
|
||||
|
||||
private final static class FSTDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private final static class FSTPostingsEnum extends PostingsEnum {
|
||||
private final boolean storePayloads;
|
||||
private byte[] buffer = new byte[16];
|
||||
private final ByteArrayDataInput in = new ByteArrayDataInput(buffer);
|
||||
|
@ -572,7 +573,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
private int pos;
|
||||
private final BytesRef payload = new BytesRef();
|
||||
|
||||
public FSTDocsAndPositionsEnum(boolean storePayloads, boolean storeOffsets) {
|
||||
public FSTPostingsEnum(boolean storePayloads, boolean storeOffsets) {
|
||||
this.storePayloads = storePayloads;
|
||||
this.storeOffsets = storeOffsets;
|
||||
}
|
||||
|
@ -581,7 +582,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
return storePayloads == this.storePayloads && storeOffsets == this.storeOffsets;
|
||||
}
|
||||
|
||||
public FSTDocsAndPositionsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
||||
public FSTPostingsEnum reset(BytesRef bufferIn, Bits liveDocs, int numDocs) {
|
||||
assert numDocs > 0;
|
||||
|
||||
// System.out.println("D&P reset bytes this=" + this);
|
||||
|
@ -807,7 +808,27 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
|
||||
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
|
||||
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
return null;
|
||||
}
|
||||
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
decodeMetaData();
|
||||
FSTPostingsEnum docsAndPositionsEnum;
|
||||
if (reuse == null || !(reuse instanceof FSTPostingsEnum)) {
|
||||
docsAndPositionsEnum = new FSTPostingsEnum(field.hasPayloads(), hasOffsets);
|
||||
} else {
|
||||
docsAndPositionsEnum = (FSTPostingsEnum) reuse;
|
||||
if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
|
||||
docsAndPositionsEnum = new FSTPostingsEnum(field.hasPayloads(), hasOffsets);
|
||||
}
|
||||
}
|
||||
//System.out.println("D&P reset this=" + this);
|
||||
return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
|
||||
}
|
||||
|
||||
decodeMetaData();
|
||||
FSTDocsEnum docsEnum;
|
||||
|
||||
|
@ -822,27 +843,6 @@ public final class MemoryPostingsFormat extends PostingsFormat {
|
|||
return docsEnum.reset(this.postingsSpare, liveDocs, docFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
|
||||
boolean hasOffsets = field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
return null;
|
||||
}
|
||||
decodeMetaData();
|
||||
FSTDocsAndPositionsEnum docsAndPositionsEnum;
|
||||
if (reuse == null || !(reuse instanceof FSTDocsAndPositionsEnum)) {
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
|
||||
} else {
|
||||
docsAndPositionsEnum = (FSTDocsAndPositionsEnum) reuse;
|
||||
if (!docsAndPositionsEnum.canReuse(field.hasPayloads(), hasOffsets)) {
|
||||
docsAndPositionsEnum = new FSTDocsAndPositionsEnum(field.hasPayloads(), hasOffsets);
|
||||
}
|
||||
}
|
||||
//System.out.println("D&P reset this=" + this);
|
||||
return docsAndPositionsEnum.reset(postingsSpare, liveDocs, docFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
return current.input;
|
||||
|
|
|
@ -17,16 +17,6 @@ package org.apache.lucene.codecs.simpletext;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collection;
|
||||
|
@ -37,11 +27,11 @@ import java.util.Map;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -68,11 +58,21 @@ import org.apache.lucene.util.fst.PairOutputs;
|
|||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.DOC;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.END_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FIELD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.FREQ;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.PAYLOAD;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.POS;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.START_OFFSET;
|
||||
import static org.apache.lucene.codecs.simpletext.SimpleTextFieldsWriter.TERM;
|
||||
|
||||
class SimpleTextFieldsReader extends FieldsProducer {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextFieldsReader.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(TreeMap.class);
|
||||
|
||||
private final TreeMap<String,Long> fields;
|
||||
private final IndexInput in;
|
||||
|
@ -93,12 +93,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
TreeMap<String,Long> fields = new TreeMap<>();
|
||||
|
||||
|
||||
while (true) {
|
||||
SimpleTextUtil.readLine(input, scratch);
|
||||
if (scratch.get().equals(END)) {
|
||||
|
@ -206,9 +206,26 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
public long totalTermFreq() {
|
||||
return indexOptions == IndexOptions.DOCS ? -1 : totalTermFreq;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if (PostingsEnum.requiresPositions(flags)) {
|
||||
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed
|
||||
return null;
|
||||
}
|
||||
|
||||
SimpleTextPostingsEnum docsAndPositionsEnum;
|
||||
if (reuse != null && reuse instanceof SimpleTextPostingsEnum && ((SimpleTextPostingsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
|
||||
docsAndPositionsEnum = (SimpleTextPostingsEnum) reuse;
|
||||
} else {
|
||||
docsAndPositionsEnum = new SimpleTextPostingsEnum();
|
||||
}
|
||||
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
|
||||
|
||||
}
|
||||
|
||||
SimpleTextDocsEnum docsEnum;
|
||||
if (reuse != null && reuse instanceof SimpleTextDocsEnum && ((SimpleTextDocsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
|
||||
docsEnum = (SimpleTextDocsEnum) reuse;
|
||||
|
@ -218,22 +235,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return docsEnum.reset(docsStart, liveDocs, indexOptions == IndexOptions.DOCS, docFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed
|
||||
return null;
|
||||
}
|
||||
|
||||
SimpleTextDocsAndPositionsEnum docsAndPositionsEnum;
|
||||
if (reuse != null && reuse instanceof SimpleTextDocsAndPositionsEnum && ((SimpleTextDocsAndPositionsEnum) reuse).canReuse(SimpleTextFieldsReader.this.in)) {
|
||||
docsAndPositionsEnum = (SimpleTextDocsAndPositionsEnum) reuse;
|
||||
} else {
|
||||
docsAndPositionsEnum = new SimpleTextDocsAndPositionsEnum();
|
||||
}
|
||||
return docsAndPositionsEnum.reset(docsStart, liveDocs, indexOptions, docFreq);
|
||||
}
|
||||
}
|
||||
|
||||
private class SimpleTextDocsEnum extends DocsEnum {
|
||||
|
@ -246,7 +247,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
|
||||
private int cost;
|
||||
|
||||
|
||||
public SimpleTextDocsEnum() {
|
||||
this.inStart = SimpleTextFieldsReader.this.in;
|
||||
this.in = this.inStart.clone();
|
||||
|
@ -276,6 +277,12 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return tf;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false : "Shouldn't be calling nextPosition() if you haven't asked for positions";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (docID == NO_MORE_DOCS) {
|
||||
|
@ -328,14 +335,14 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
// Naive -- better to index skip data
|
||||
return slowAdvance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
||||
private class SimpleTextDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private class SimpleTextPostingsEnum extends PostingsEnum {
|
||||
private final IndexInput inStart;
|
||||
private final IndexInput in;
|
||||
private int docID = -1;
|
||||
|
@ -345,6 +352,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private final BytesRefBuilder scratch2 = new BytesRefBuilder();
|
||||
private final CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
|
||||
private final CharsRefBuilder scratchUTF16_2 = new CharsRefBuilder();
|
||||
private int pos;
|
||||
private BytesRef payload;
|
||||
private long nextDocStart;
|
||||
private boolean readOffsets;
|
||||
|
@ -353,7 +361,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
private int endOffset;
|
||||
private int cost;
|
||||
|
||||
public SimpleTextDocsAndPositionsEnum() {
|
||||
public SimpleTextPostingsEnum() {
|
||||
this.inStart = SimpleTextFieldsReader.this.in;
|
||||
this.in = inStart.clone();
|
||||
}
|
||||
|
@ -362,7 +370,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
return in == inStart;
|
||||
}
|
||||
|
||||
public SimpleTextDocsAndPositionsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
|
||||
public SimpleTextPostingsEnum reset(long fp, Bits liveDocs, IndexOptions indexOptions, int docFreq) {
|
||||
this.liveDocs = liveDocs;
|
||||
nextDocStart = fp;
|
||||
docID = -1;
|
||||
|
@ -437,7 +445,6 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
final int pos;
|
||||
if (readPositions) {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
assert StringHelper.startsWith(scratch.get(), POS): "got line=" + scratch.get().utf8ToString();
|
||||
|
@ -488,7 +495,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
public BytesRef getPayload() {
|
||||
return payload;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
|
@ -506,9 +513,9 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
private static final long TERMS_BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextTerms.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
|
||||
private class SimpleTextTerms extends Terms implements Accountable {
|
||||
private final long termsStart;
|
||||
private final FieldInfo fieldInfo;
|
||||
|
@ -533,7 +540,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
|
||||
final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
|
||||
final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
|
||||
outputsInner);
|
||||
outputsInner);
|
||||
b = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
IndexInput in = SimpleTextFieldsReader.this.in.clone();
|
||||
in.seek(termsStart);
|
||||
|
@ -548,8 +555,8 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
|
||||
if (lastDocsStart != -1) {
|
||||
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
|
||||
outputs.newPair(lastDocsStart,
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
outputs.newPair(lastDocsStart,
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
sumTotalTermFreq += totalTermFreq;
|
||||
}
|
||||
break;
|
||||
|
@ -565,7 +572,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
} else if (StringHelper.startsWith(scratch.get(), TERM)) {
|
||||
if (lastDocsStart != -1) {
|
||||
b.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
outputsInner.newPair((long) docFreq, totalTermFreq)));
|
||||
}
|
||||
lastDocsStart = in.getFilePointer();
|
||||
final int len = scratch.length() - TERM.length;
|
||||
|
@ -652,7 +659,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
public boolean hasPositions() {
|
||||
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return fieldInfo.hasPayloads();
|
||||
|
|
|
@ -20,8 +20,7 @@ package org.apache.lucene.codecs.simpletext;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -33,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
class SimpleTextFieldsWriter extends FieldsConsumer {
|
||||
|
||||
|
||||
private IndexOutput out;
|
||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
private final SegmentWriteState writeState;
|
||||
|
@ -79,22 +78,21 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
|
||||
int flags = 0;
|
||||
if (hasPositions) {
|
||||
|
||||
flags = PostingsEnum.FLAG_POSITIONS;
|
||||
if (hasPayloads) {
|
||||
flags = flags | DocsAndPositionsEnum.FLAG_PAYLOADS;
|
||||
flags = flags | PostingsEnum.FLAG_PAYLOADS;
|
||||
}
|
||||
if (hasOffsets) {
|
||||
flags = flags | DocsAndPositionsEnum.FLAG_OFFSETS;
|
||||
flags = flags | PostingsEnum.FLAG_OFFSETS;
|
||||
}
|
||||
} else {
|
||||
if (hasFreqs) {
|
||||
flags = flags | DocsEnum.FLAG_FREQS;
|
||||
flags = flags | PostingsEnum.FLAG_FREQS;
|
||||
}
|
||||
}
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
DocsAndPositionsEnum posEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
// for each term in field
|
||||
while(true) {
|
||||
|
@ -103,20 +101,16 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
break;
|
||||
}
|
||||
|
||||
if (hasPositions) {
|
||||
posEnum = termsEnum.docsAndPositions(null, posEnum, flags);
|
||||
docsEnum = posEnum;
|
||||
} else {
|
||||
docsEnum = termsEnum.docs(null, docsEnum, flags);
|
||||
}
|
||||
assert docsEnum != null: "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, flags);
|
||||
|
||||
assert postingsEnum != null: "termsEnum=" + termsEnum + " hasPos=" + hasPositions + " flags=" + flags;
|
||||
|
||||
boolean wroteTerm = false;
|
||||
|
||||
// for each doc in field+term
|
||||
while(true) {
|
||||
int doc = docsEnum.nextDoc();
|
||||
if (doc == DocsEnum.NO_MORE_DOCS) {
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc == PostingsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -143,7 +137,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
write(Integer.toString(doc));
|
||||
newline();
|
||||
if (hasFreqs) {
|
||||
int freq = docsEnum.freq();
|
||||
int freq = postingsEnum.freq();
|
||||
write(FREQ);
|
||||
write(Integer.toString(freq));
|
||||
newline();
|
||||
|
@ -154,15 +148,15 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
|
||||
// for each pos in field+term+doc
|
||||
for(int i=0;i<freq;i++) {
|
||||
int position = posEnum.nextPosition();
|
||||
int position = postingsEnum.nextPosition();
|
||||
|
||||
write(POS);
|
||||
write(Integer.toString(position));
|
||||
newline();
|
||||
|
||||
if (hasOffsets) {
|
||||
int startOffset = posEnum.startOffset();
|
||||
int endOffset = posEnum.endOffset();
|
||||
int startOffset = postingsEnum.startOffset();
|
||||
int endOffset = postingsEnum.endOffset();
|
||||
assert endOffset >= startOffset;
|
||||
assert startOffset >= lastStartOffset: "startOffset=" + startOffset + " lastStartOffset=" + lastStartOffset;
|
||||
lastStartOffset = startOffset;
|
||||
|
@ -174,7 +168,7 @@ class SimpleTextFieldsWriter extends FieldsConsumer {
|
|||
newline();
|
||||
}
|
||||
|
||||
BytesRef payload = posEnum.getPayload();
|
||||
BytesRef payload = postingsEnum.getPayload();
|
||||
|
||||
if (payload != null && payload.length > 0) {
|
||||
assert payload.length != 0;
|
||||
|
|
|
@ -25,8 +25,8 @@ import java.util.SortedMap;
|
|||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
|
@ -59,15 +59,15 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;
|
|||
public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextTermVectorsReader.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
|
||||
RamUsageEstimator.shallowSizeOfInstance(SimpleTextTermVectorsReader.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(BytesRef.class)
|
||||
+ RamUsageEstimator.shallowSizeOfInstance(CharsRef.class);
|
||||
|
||||
private long offsets[]; /* docid -> offset in .vec file */
|
||||
private IndexInput in;
|
||||
private BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
private CharsRefBuilder scratchUTF16 = new CharsRefBuilder();
|
||||
|
||||
|
||||
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) throws IOException {
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -82,15 +82,15 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
readIndex(si.getDocCount());
|
||||
}
|
||||
|
||||
|
||||
// used by clone
|
||||
SimpleTextTermVectorsReader(long offsets[], IndexInput in) {
|
||||
this.offsets = offsets;
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
// we don't actually write a .tvx-like index, instead we read the
|
||||
// vectors file in entirety up-front and save the offsets
|
||||
|
||||
// we don't actually write a .tvx-like index, instead we read the
|
||||
// vectors file in entirety up-front and save the offsets
|
||||
// so we can seek to the data later.
|
||||
private void readIndex(int maxDoc) throws IOException {
|
||||
ChecksumIndexInput input = new BufferedChecksumIndexInput(in);
|
||||
|
@ -106,7 +106,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
SimpleTextUtil.checkFooter(input);
|
||||
assert upto == offsets.length;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Fields get(int doc) throws IOException {
|
||||
SortedMap<String,SimpleTVTerms> fields = new TreeMap<>();
|
||||
|
@ -122,30 +122,30 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
assert StringHelper.startsWith(scratch.get(), FIELD);
|
||||
// skip fieldNumber:
|
||||
parseIntAt(FIELD.length);
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), FIELDNAME);
|
||||
String fieldName = readString(FIELDNAME.length, scratch);
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), FIELDPOSITIONS);
|
||||
boolean positions = Boolean.parseBoolean(readString(FIELDPOSITIONS.length, scratch));
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), FIELDOFFSETS);
|
||||
boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), FIELDPAYLOADS);
|
||||
boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), FIELDTERMCOUNT);
|
||||
int termCount = parseIntAt(FIELDTERMCOUNT.length);
|
||||
|
||||
|
||||
SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
|
||||
fields.put(fieldName, terms);
|
||||
|
||||
|
||||
BytesRefBuilder term = new BytesRefBuilder();
|
||||
for (int j = 0; j < termCount; j++) {
|
||||
readLine();
|
||||
|
@ -154,14 +154,14 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
term.grow(termLength);
|
||||
term.setLength(termLength);
|
||||
System.arraycopy(scratch.bytes(), TERMTEXT.length, term.bytes(), 0, termLength);
|
||||
|
||||
|
||||
SimpleTVPostings postings = new SimpleTVPostings();
|
||||
terms.terms.put(term.toBytesRef(), postings);
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), TERMFREQ);
|
||||
postings.freq = parseIntAt(TERMFREQ.length);
|
||||
|
||||
|
||||
if (positions || offsets) {
|
||||
if (positions) {
|
||||
postings.positions = new int[postings.freq];
|
||||
|
@ -169,12 +169,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
postings.payloads = new BytesRef[postings.freq];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (offsets) {
|
||||
postings.startOffsets = new int[postings.freq];
|
||||
postings.endOffsets = new int[postings.freq];
|
||||
}
|
||||
|
||||
|
||||
for (int k = 0; k < postings.freq; k++) {
|
||||
if (positions) {
|
||||
readLine();
|
||||
|
@ -192,12 +192,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (offsets) {
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), STARTOFFSET);
|
||||
postings.startOffsets[k] = parseIntAt(STARTOFFSET.length);
|
||||
|
||||
|
||||
readLine();
|
||||
assert StringHelper.startsWith(scratch.get(), ENDOFFSET);
|
||||
postings.endOffsets[k] = parseIntAt(ENDOFFSET.length);
|
||||
|
@ -216,11 +216,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
return new SimpleTextTermVectorsReader(offsets, in.clone());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try {
|
||||
IOUtils.close(in);
|
||||
IOUtils.close(in);
|
||||
} finally {
|
||||
in = null;
|
||||
offsets = null;
|
||||
|
@ -230,20 +230,20 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
private void readLine() throws IOException {
|
||||
SimpleTextUtil.readLine(in, scratch);
|
||||
}
|
||||
|
||||
|
||||
private int parseIntAt(int offset) {
|
||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length()-offset);
|
||||
return ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
|
||||
}
|
||||
|
||||
|
||||
private String readString(int offset, BytesRefBuilder scratch) {
|
||||
scratchUTF16.copyUTF8Bytes(scratch.bytes(), offset, scratch.length()-offset);
|
||||
return scratchUTF16.toString();
|
||||
}
|
||||
|
||||
|
||||
private class SimpleTVFields extends Fields {
|
||||
private final SortedMap<String,SimpleTVTerms> fields;
|
||||
|
||||
|
||||
SimpleTVFields(SortedMap<String,SimpleTVTerms> fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
|
@ -263,20 +263,20 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
return fields.size();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class SimpleTVTerms extends Terms {
|
||||
final SortedMap<BytesRef,SimpleTVPostings> terms;
|
||||
final boolean hasOffsets;
|
||||
final boolean hasPositions;
|
||||
final boolean hasPayloads;
|
||||
|
||||
|
||||
SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
|
||||
this.hasOffsets = hasOffsets;
|
||||
this.hasPositions = hasPositions;
|
||||
this.hasPayloads = hasPayloads;
|
||||
terms = new TreeMap<>();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||
// TODO: reuse
|
||||
|
@ -317,13 +317,13 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
public boolean hasPositions() {
|
||||
return hasPositions;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasPayloads() {
|
||||
return hasPayloads;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class SimpleTVPostings {
|
||||
private int freq;
|
||||
private int positions[];
|
||||
|
@ -331,17 +331,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
private int endOffsets[];
|
||||
private BytesRef payloads[];
|
||||
}
|
||||
|
||||
|
||||
private static class SimpleTVTermsEnum extends TermsEnum {
|
||||
SortedMap<BytesRef,SimpleTVPostings> terms;
|
||||
Iterator<Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings>> iterator;
|
||||
Map.Entry<BytesRef,SimpleTextTermVectorsReader.SimpleTVPostings> current;
|
||||
|
||||
|
||||
SimpleTVTermsEnum(SortedMap<BytesRef,SimpleTVPostings> terms) {
|
||||
this.terms = terms;
|
||||
this.iterator = terms.entrySet().iterator();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) throws IOException {
|
||||
iterator = terms.tailMap(text).entrySet().iterator();
|
||||
|
@ -388,26 +388,27 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if (PostingsEnum.requiresPositions(flags)) {
|
||||
SimpleTVPostings postings = current.getValue();
|
||||
if (postings.positions == null && postings.startOffsets == null) {
|
||||
return null;
|
||||
}
|
||||
// TODO: reuse
|
||||
SimpleTVPostingsEnum e = new SimpleTVPostingsEnum();
|
||||
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
|
||||
return e;
|
||||
}
|
||||
|
||||
// TODO: reuse
|
||||
SimpleTVDocsEnum e = new SimpleTVDocsEnum();
|
||||
e.reset(liveDocs, (flags & DocsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
|
||||
e.reset(liveDocs, (flags & PostingsEnum.FLAG_FREQS) == 0 ? 1 : current.getValue().freq);
|
||||
return e;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
SimpleTVPostings postings = current.getValue();
|
||||
if (postings.positions == null && postings.startOffsets == null) {
|
||||
return null;
|
||||
}
|
||||
// TODO: reuse
|
||||
SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
|
||||
e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
|
||||
return e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// note: these two enum classes are exactly like the Default impl...
|
||||
private static class SimpleTVDocsEnum extends DocsEnum {
|
||||
private boolean didNext;
|
||||
|
@ -421,6 +422,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
|
@ -447,14 +454,14 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
this.doc = -1;
|
||||
didNext = false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
private static class SimpleTVDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
|
||||
private static class SimpleTVPostingsEnum extends PostingsEnum {
|
||||
private boolean didNext;
|
||||
private int doc = -1;
|
||||
private int nextPos;
|
||||
|
@ -512,11 +519,11 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
|
||||
@Override
|
||||
public int nextPosition() {
|
||||
assert (positions != null && nextPos < positions.length) ||
|
||||
startOffsets != null && nextPos < startOffsets.length;
|
||||
if (positions != null) {
|
||||
assert nextPos < positions.length;
|
||||
return positions[nextPos++];
|
||||
} else {
|
||||
assert nextPos < startOffsets.length;
|
||||
nextPos++;
|
||||
return -1;
|
||||
}
|
||||
|
@ -539,7 +546,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
return endOffsets[nextPos-1];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return 1;
|
||||
|
@ -550,7 +557,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
public long ramBytesUsed() {
|
||||
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(offsets);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
|
|
|
@ -28,6 +28,11 @@ public class TestDirectPostingsFormat extends BasePostingsFormatTestCase {
|
|||
// TODO: randomize parameters
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new DirectPostingsFormat());
|
||||
|
||||
@Override
|
||||
protected boolean isPostingsEnumReuseImplemented() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.analysis;
|
|||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
with type "eos". The default token type is "word".
|
||||
<p>
|
||||
A Token can optionally have metadata (a.k.a. payload) in the form of a variable
|
||||
length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the
|
||||
length byte array. Use {@link org.apache.lucene.index.PostingsEnum#getPayload()} to retrieve the
|
||||
payloads from the index.
|
||||
|
||||
<br><br>
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.analysis.tokenattributes;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -33,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
* best to use the minimum number of bytes necessary. Some codec implementations
|
||||
* may optimize payload storage when all payloads have the same length.
|
||||
*
|
||||
* @see DocsAndPositionsEnum
|
||||
* @see org.apache.lucene.index.PostingsEnum
|
||||
*/
|
||||
public interface PayloadAttribute extends Attribute {
|
||||
/**
|
||||
|
|
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.Attribute;
|
|||
*
|
||||
* </ul>
|
||||
*
|
||||
* @see org.apache.lucene.index.DocsAndPositionsEnum
|
||||
* @see org.apache.lucene.index.PostingsEnum
|
||||
*/
|
||||
public interface PositionIncrementAttribute extends Attribute {
|
||||
/** Set the position increment. The default value is one.
|
||||
|
|
|
@ -16,13 +16,12 @@ package org.apache.lucene.codecs;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.DocsEnum; // javadocs
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
||||
/**
|
||||
* Holds all state required for {@link PostingsReaderBase}
|
||||
* to produce a {@link DocsEnum} without re-seeking the
|
||||
* to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
|
||||
* terms dict.
|
||||
*/
|
||||
public class BlockTermState extends OrdTermState {
|
||||
|
|
|
@ -20,8 +20,7 @@ package org.apache.lucene.codecs;
|
|||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
|
@ -31,8 +30,8 @@ import org.apache.lucene.util.Bits;
|
|||
|
||||
/** The core terms dictionaries (BlockTermsReader,
|
||||
* BlockTreeTermsReader) interact with a single instance
|
||||
* of this class to manage creation of {@link DocsEnum} and
|
||||
* {@link DocsAndPositionsEnum} instances. It provides an
|
||||
* of this class to manage creation of {@link org.apache.lucene.index.PostingsEnum} and
|
||||
* {@link org.apache.lucene.index.PostingsEnum} instances. It provides an
|
||||
* IndexInput (termsIn) where this class may read any
|
||||
* previously stored data that it had written in its
|
||||
* corresponding {@link PostingsWriterBase} at indexing
|
||||
|
@ -66,12 +65,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
|
|||
|
||||
/** Must fully consume state, since after this call that
|
||||
* TermState may be reused. */
|
||||
public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse, int flags) throws IOException;
|
||||
|
||||
/** Must fully consume state, since after this call that
|
||||
* TermState may be reused. */
|
||||
public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse,
|
||||
int flags) throws IOException;
|
||||
public abstract PostingsEnum postings(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, PostingsEnum reuse, int flags) throws IOException;
|
||||
|
||||
/**
|
||||
* Checks consistency of this reader.
|
||||
|
@ -81,7 +75,7 @@ public abstract class PostingsReaderBase implements Closeable, Accountable {
|
|||
* @lucene.internal
|
||||
*/
|
||||
public abstract void checkIntegrity() throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
public abstract void close() throws IOException;
|
||||
}
|
||||
|
|
|
@ -17,12 +17,7 @@ package org.apache.lucene.codecs;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
|
||||
import org.apache.lucene.index.DocsEnum; // javadocs
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -31,6 +26,9 @@ import org.apache.lucene.store.IndexOutput;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Class that plugs into term dictionaries, such as {@link
|
||||
* BlockTreeTermsWriter}, and handles writing postings.
|
||||
|
@ -54,8 +52,8 @@ public abstract class PostingsWriterBase implements Closeable {
|
|||
public abstract void init(IndexOutput termsOut, SegmentWriteState state) throws IOException;
|
||||
|
||||
/** Write all postings for one term; use the provided
|
||||
* {@link TermsEnum} to pull a {@link DocsEnum} or {@link
|
||||
* DocsAndPositionsEnum}. This method should not
|
||||
* {@link TermsEnum} to pull a {@link org.apache.lucene.index.PostingsEnum}.
|
||||
* This method should not
|
||||
* re-position the {@code TermsEnum}! It is already
|
||||
* positioned on the term that should be written. This
|
||||
* method must set the bit in the provided {@link
|
||||
|
|
|
@ -19,8 +19,7 @@ package org.apache.lucene.codecs;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -42,8 +41,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
||||
|
||||
// Reused in writeTerm
|
||||
private DocsEnum docsEnum;
|
||||
private DocsAndPositionsEnum posEnum;
|
||||
private PostingsEnum postingsEnum;
|
||||
private int enumFlags;
|
||||
|
||||
/** {@link FieldInfo} of current field being written. */
|
||||
|
@ -100,18 +98,18 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
if (writeFreqs == false) {
|
||||
enumFlags = 0;
|
||||
} else if (writePositions == false) {
|
||||
enumFlags = DocsEnum.FLAG_FREQS;
|
||||
enumFlags = PostingsEnum.FLAG_FREQS;
|
||||
} else if (writeOffsets == false) {
|
||||
if (writePayloads) {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS;
|
||||
enumFlags = PostingsEnum.FLAG_PAYLOADS;
|
||||
} else {
|
||||
enumFlags = 0;
|
||||
enumFlags = PostingsEnum.FLAG_POSITIONS;
|
||||
}
|
||||
} else {
|
||||
if (writePayloads) {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_PAYLOADS | DocsAndPositionsEnum.FLAG_OFFSETS;
|
||||
enumFlags = PostingsEnum.FLAG_PAYLOADS | PostingsEnum.FLAG_OFFSETS;
|
||||
} else {
|
||||
enumFlags = DocsAndPositionsEnum.FLAG_OFFSETS;
|
||||
enumFlags = PostingsEnum.FLAG_OFFSETS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -121,26 +119,21 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
@Override
|
||||
public final BlockTermState writeTerm(BytesRef term, TermsEnum termsEnum, FixedBitSet docsSeen) throws IOException {
|
||||
startTerm();
|
||||
if (writePositions == false) {
|
||||
docsEnum = termsEnum.docs(null, docsEnum, enumFlags);
|
||||
} else {
|
||||
posEnum = termsEnum.docsAndPositions(null, posEnum, enumFlags);
|
||||
docsEnum = posEnum;
|
||||
}
|
||||
assert docsEnum != null;
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, enumFlags);
|
||||
assert postingsEnum != null;
|
||||
|
||||
int docFreq = 0;
|
||||
long totalTermFreq = 0;
|
||||
while (true) {
|
||||
int docID = docsEnum.nextDoc();
|
||||
if (docID == DocsEnum.NO_MORE_DOCS) {
|
||||
int docID = postingsEnum.nextDoc();
|
||||
if (docID == PostingsEnum.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
docFreq++;
|
||||
docsSeen.set(docID);
|
||||
int freq;
|
||||
if (writeFreqs) {
|
||||
freq = docsEnum.freq();
|
||||
freq = postingsEnum.freq();
|
||||
totalTermFreq += freq;
|
||||
} else {
|
||||
freq = -1;
|
||||
|
@ -149,13 +142,13 @@ public abstract class PushPostingsWriterBase extends PostingsWriterBase {
|
|||
|
||||
if (writePositions) {
|
||||
for(int i=0;i<freq;i++) {
|
||||
int pos = posEnum.nextPosition();
|
||||
BytesRef payload = writePayloads ? posEnum.getPayload() : null;
|
||||
int pos = postingsEnum.nextPosition();
|
||||
BytesRef payload = writePayloads ? postingsEnum.getPayload() : null;
|
||||
int startOffset;
|
||||
int endOffset;
|
||||
if (writeOffsets) {
|
||||
startOffset = posEnum.startOffset();
|
||||
endOffset = posEnum.endOffset();
|
||||
startOffset = postingsEnum.startOffset();
|
||||
endOffset = postingsEnum.endOffset();
|
||||
} else {
|
||||
startOffset = -1;
|
||||
endOffset = -1;
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.Closeable;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; // javadocs
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum; // javadocs
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
|
||||
|
@ -40,7 +39,7 @@ public abstract class TermVectorsReader implements Cloneable, Closeable, Account
|
|||
/** Returns term vectors for this document, or null if
|
||||
* term vectors were not indexed. If offsets are
|
||||
* available they are in an {@link OffsetAttribute}
|
||||
* available from the {@link DocsAndPositionsEnum}. */
|
||||
* available from the {@link org.apache.lucene.index.PostingsEnum}. */
|
||||
public abstract Fields get(int doc) throws IOException;
|
||||
|
||||
/**
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.Closeable;
|
|||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -225,7 +225,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
|||
String lastFieldName = null;
|
||||
|
||||
TermsEnum termsEnum = null;
|
||||
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
||||
PostingsEnum docsAndPositionsEnum = null;
|
||||
|
||||
int fieldCount = 0;
|
||||
for(String fieldName : vectors) {
|
||||
|
@ -268,7 +268,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
|||
startTerm(termsEnum.term(), freq);
|
||||
|
||||
if (hasPositions || hasOffsets) {
|
||||
docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
|
||||
docsAndPositionsEnum = termsEnum.postings(null, docsAndPositionsEnum, PostingsEnum.FLAG_OFFSETS | PostingsEnum.FLAG_PAYLOADS);
|
||||
assert docsAndPositionsEnum != null;
|
||||
|
||||
final int docID = docsAndPositionsEnum.nextDoc();
|
||||
|
@ -282,7 +282,7 @@ public abstract class TermVectorsWriter implements Closeable {
|
|||
|
||||
final BytesRef payload = docsAndPositionsEnum.getPayload();
|
||||
|
||||
assert !hasPositions || pos >= 0;
|
||||
assert !hasPositions || pos >= 0 ;
|
||||
addPosition(pos, startOffset, endOffset, payload);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,9 +19,7 @@ package org.apache.lucene.codecs.blocktree;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -203,20 +201,9 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.termState, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
private int getState() {
|
||||
|
|
|
@ -21,9 +21,7 @@ import java.io.IOException;
|
|||
import java.io.PrintStream;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
|
@ -981,7 +979,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits skipDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits skipDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
assert !eof;
|
||||
//if (DEBUG) {
|
||||
//System.out.println("BTTR.docs seg=" + segment);
|
||||
|
@ -990,19 +988,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
//if (DEBUG) {
|
||||
//System.out.println(" state=" + currentFrame.state);
|
||||
//}
|
||||
return fr.parent.postingsReader.docs(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (fr.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) {
|
||||
// Positions were not indexed:
|
||||
return null;
|
||||
}
|
||||
|
||||
assert !eof;
|
||||
currentFrame.decodeMetaData();
|
||||
return fr.parent.postingsReader.docsAndPositions(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
return fr.parent.postingsReader.postings(fr.fieldInfo, currentFrame.state, skipDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,19 +17,6 @@ package org.apache.lucene.codecs.compressing;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PACKED_BLOCK_SIZE;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHUNK_STATS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
@ -40,8 +27,7 @@ import java.util.NoSuchElementException;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.TermVectorsReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -65,6 +51,18 @@ import org.apache.lucene.util.LongsRef;
|
|||
import org.apache.lucene.util.packed.BlockPackedReaderIterator;
|
||||
import org.apache.lucene.util.packed.PackedInts;
|
||||
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_DAT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.CODEC_SFX_IDX;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.FLAGS_BITS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.OFFSETS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PACKED_BLOCK_SIZE;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.PAYLOADS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.POSITIONS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_EXTENSION;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CHUNK_STATS;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_CURRENT;
|
||||
import static org.apache.lucene.codecs.compressing.CompressingTermVectorsWriter.VERSION_START;
|
||||
|
||||
/**
|
||||
* {@link TermVectorsReader} for {@link CompressingTermVectorsFormat}.
|
||||
|
@ -937,30 +935,27 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
}
|
||||
|
||||
@Override
|
||||
public final DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
final TVDocsEnum docsEnum;
|
||||
if (reuse != null && reuse instanceof TVDocsEnum) {
|
||||
docsEnum = (TVDocsEnum) reuse;
|
||||
public final PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
|
||||
if (positions == null && startOffsets == null)
|
||||
return null;
|
||||
}
|
||||
|
||||
final TVPostingsEnum docsEnum;
|
||||
if (reuse != null && reuse instanceof TVPostingsEnum) {
|
||||
docsEnum = (TVPostingsEnum) reuse;
|
||||
} else {
|
||||
docsEnum = new TVDocsEnum();
|
||||
docsEnum = new TVPostingsEnum();
|
||||
}
|
||||
|
||||
docsEnum.reset(liveDocs, termFreqs[ord], positionIndex[ord], positions, startOffsets, lengths, payloads, payloadIndex);
|
||||
return docsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (positions == null && startOffsets == null) {
|
||||
return null;
|
||||
}
|
||||
// TODO: slightly sheisty
|
||||
return (DocsAndPositionsEnum) docs(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class TVDocsEnum extends DocsAndPositionsEnum {
|
||||
private static class TVPostingsEnum extends PostingsEnum {
|
||||
|
||||
private Bits liveDocs;
|
||||
private int doc = -1;
|
||||
|
@ -974,7 +969,7 @@ public final class CompressingTermVectorsReader extends TermVectorsReader implem
|
|||
private int basePayloadOffset;
|
||||
private int i;
|
||||
|
||||
TVDocsEnum() {
|
||||
TVPostingsEnum() {
|
||||
payload = new BytesRef();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,27 +17,7 @@ package org.apache.lucene.codecs.lucene50;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_LIVE;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_MISSING;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.CONST_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.DELTA_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.GCD_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_COUNT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.MONOTONIC_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_SINGLE_VALUED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_WITH_ADDRESSES;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.TABLE_COMPRESSED;
|
||||
|
||||
import java.io.Closeable; // javadocs
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -52,8 +32,7 @@ import org.apache.lucene.codecs.DocValuesProducer;
|
|||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
|
@ -78,6 +57,26 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.apache.lucene.util.packed.DirectReader;
|
||||
import org.apache.lucene.util.packed.MonotonicBlockPackedReader;
|
||||
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_LIVE;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.ALL_MISSING;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_PREFIX_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.BLOCK_INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.CONST_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.DELTA_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.GCD_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_COUNT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.MONOTONIC_COMPRESSED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_MASK;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.REVERSE_INTERVAL_SHIFT;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_SINGLE_VALUED;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.SORTED_WITH_ADDRESSES;
|
||||
import static org.apache.lucene.codecs.lucene50.Lucene50DocValuesConsumer.TABLE_COMPRESSED;
|
||||
|
||||
/** reader for {@link Lucene50DocValuesFormat} */
|
||||
class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
||||
private final Map<String,NumericEntry> numerics = new HashMap<>();
|
||||
|
@ -1141,14 +1140,10 @@ class Lucene50DocValuesProducer extends DocValuesProducer implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase;
|
|||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
|
@ -141,7 +141,7 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* determined by the largest integer. Smaller block size result in smaller variance among width
|
||||
* of integers hence smaller indexes. Larger block size result in more efficient bulk i/o hence
|
||||
* better acceleration. This value should always be a multiple of 64, currently fixed as 128 as
|
||||
* a tradeoff. It is also the skip interval used to accelerate {@link DocsEnum#advance(int)}.
|
||||
* a tradeoff. It is also the skip interval used to accelerate {@link org.apache.lucene.index.PostingsEnum#advance(int)}.
|
||||
* <li>DocFPDelta determines the position of this term's TermFreqs within the .doc file.
|
||||
* In particular, it is the difference of file offset between this term's
|
||||
* data and previous term's data (or zero, for the first term in the block).On disk it is
|
||||
|
|
|
@ -24,11 +24,11 @@ import org.apache.lucene.codecs.BlockTermState;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat.IntBlockTermState;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.DataInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -193,39 +193,38 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
BlockDocsEnum docsEnum;
|
||||
if (reuse instanceof BlockDocsEnum) {
|
||||
docsEnum = (BlockDocsEnum) reuse;
|
||||
if (!docsEnum.canReuse(docIn, fieldInfo)) {
|
||||
public PostingsEnum postings(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) < PostingsEnum.FLAG_POSITIONS) {
|
||||
BlockDocsEnum docsEnum;
|
||||
if (reuse instanceof BlockDocsEnum) {
|
||||
docsEnum = (BlockDocsEnum) reuse;
|
||||
if (!docsEnum.canReuse(docIn, fieldInfo)) {
|
||||
docsEnum = new BlockDocsEnum(fieldInfo);
|
||||
}
|
||||
} else {
|
||||
docsEnum = new BlockDocsEnum(fieldInfo);
|
||||
}
|
||||
} else {
|
||||
docsEnum = new BlockDocsEnum(fieldInfo);
|
||||
return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags);
|
||||
}
|
||||
return docsEnum.reset(liveDocs, (IntBlockTermState) termState, flags);
|
||||
}
|
||||
|
||||
// TODO: specialize to liveDocs vs not
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState termState, Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags)
|
||||
throws IOException {
|
||||
|
||||
boolean indexHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||
boolean indexHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||
boolean indexHasPayloads = fieldInfo.hasPayloads();
|
||||
|
||||
if ((!indexHasOffsets || (flags & DocsAndPositionsEnum.FLAG_OFFSETS) == 0) &&
|
||||
(!indexHasPayloads || (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) == 0)) {
|
||||
BlockDocsAndPositionsEnum docsAndPositionsEnum;
|
||||
if (reuse instanceof BlockDocsAndPositionsEnum) {
|
||||
docsAndPositionsEnum = (BlockDocsAndPositionsEnum) reuse;
|
||||
if (!indexHasPositions)
|
||||
return null;
|
||||
|
||||
if ((!indexHasOffsets || (flags & PostingsEnum.FLAG_OFFSETS) == 0) &&
|
||||
(!indexHasPayloads || (flags & PostingsEnum.FLAG_PAYLOADS) == 0)) {
|
||||
BlockPostingsEnum docsAndPositionsEnum;
|
||||
if (reuse instanceof BlockPostingsEnum) {
|
||||
docsAndPositionsEnum = (BlockPostingsEnum) reuse;
|
||||
if (!docsAndPositionsEnum.canReuse(docIn, fieldInfo)) {
|
||||
docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo);
|
||||
docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
|
||||
}
|
||||
} else {
|
||||
docsAndPositionsEnum = new BlockDocsAndPositionsEnum(fieldInfo);
|
||||
docsAndPositionsEnum = new BlockPostingsEnum(fieldInfo);
|
||||
}
|
||||
return docsAndPositionsEnum.reset(liveDocs, (IntBlockTermState) termState);
|
||||
} else {
|
||||
|
@ -302,7 +301,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
indexHasPayloads == fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
public DocsEnum reset(Bits liveDocs, IntBlockTermState termState, int flags) throws IOException {
|
||||
public PostingsEnum reset(Bits liveDocs, IntBlockTermState termState, int flags) throws IOException {
|
||||
this.liveDocs = liveDocs;
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
|
@ -319,7 +318,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
doc = -1;
|
||||
this.needsFreq = (flags & DocsEnum.FLAG_FREQS) != 0;
|
||||
this.needsFreq = (flags & PostingsEnum.FLAG_FREQS) != 0;
|
||||
if (indexHasFreq == false || needsFreq == false) {
|
||||
Arrays.fill(freqBuffer, 1);
|
||||
}
|
||||
|
@ -336,6 +335,12 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false; // shouldn't be calling nextPosition() on this
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
|
@ -472,7 +477,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
|
||||
final class BlockDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
final class BlockPostingsEnum extends PostingsEnum {
|
||||
|
||||
private final byte[] encoded;
|
||||
|
||||
|
@ -535,7 +540,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
private Bits liveDocs;
|
||||
private int singletonDocID; // docid when there is a single pulsed posting, otherwise -1
|
||||
|
||||
public BlockDocsAndPositionsEnum(FieldInfo fieldInfo) throws IOException {
|
||||
public BlockPostingsEnum(FieldInfo fieldInfo) throws IOException {
|
||||
this.startDocIn = Lucene50PostingsReader.this.docIn;
|
||||
this.docIn = null;
|
||||
this.posIn = Lucene50PostingsReader.this.posIn.clone();
|
||||
|
@ -550,7 +555,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
indexHasPayloads == fieldInfo.hasPayloads();
|
||||
}
|
||||
|
||||
public DocsAndPositionsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
|
||||
public PostingsEnum reset(Bits liveDocs, IntBlockTermState termState) throws IOException {
|
||||
this.liveDocs = liveDocs;
|
||||
|
||||
docFreq = termState.docFreq;
|
||||
|
@ -769,6 +774,9 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingFP != -1) {
|
||||
posIn.seek(posPendingFP);
|
||||
posPendingFP = -1;
|
||||
|
@ -813,7 +821,7 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
// Also handles payloads + offsets
|
||||
final class EverythingEnum extends DocsAndPositionsEnum {
|
||||
final class EverythingEnum extends PostingsEnum {
|
||||
|
||||
private final byte[] encoded;
|
||||
|
||||
|
@ -960,8 +968,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
|
||||
}
|
||||
|
||||
this.needsOffsets = (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0;
|
||||
this.needsPayloads = (flags & DocsAndPositionsEnum.FLAG_PAYLOADS) != 0;
|
||||
this.needsOffsets = (flags & PostingsEnum.FLAG_OFFSETS) != 0;
|
||||
this.needsPayloads = (flags & PostingsEnum.FLAG_PAYLOADS) != 0;
|
||||
|
||||
doc = -1;
|
||||
accum = 0;
|
||||
|
@ -1228,6 +1236,8 @@ public final class Lucene50PostingsReader extends PostingsReaderBase {
|
|||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert posPendingCount > 0;
|
||||
|
||||
if (posPendingFP != -1) {
|
||||
posIn.seek(posPendingFP);
|
||||
posPendingFP = -1;
|
||||
|
|
|
@ -374,7 +374,7 @@ class BufferedUpdatesStream implements Accountable {
|
|||
final int startDelCount;
|
||||
|
||||
TermsEnum termsEnum;
|
||||
DocsEnum docsEnum;
|
||||
PostingsEnum postingsEnum;
|
||||
BytesRef term;
|
||||
boolean any;
|
||||
|
||||
|
@ -562,12 +562,12 @@ class BufferedUpdatesStream implements Accountable {
|
|||
if (state.delGen < delGen) {
|
||||
|
||||
// we don't need term frequencies for this
|
||||
state.docsEnum = state.termsEnum.docs(state.rld.getLiveDocs(), state.docsEnum, DocsEnum.FLAG_NONE);
|
||||
state.postingsEnum = state.termsEnum.postings(state.rld.getLiveDocs(), state.postingsEnum, PostingsEnum.FLAG_NONE);
|
||||
|
||||
assert state.docsEnum != null;
|
||||
assert state.postingsEnum != null;
|
||||
|
||||
while (true) {
|
||||
final int docID = state.docsEnum.nextDoc();
|
||||
final int docID = state.postingsEnum.nextDoc();
|
||||
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
|
@ -623,7 +623,7 @@ class BufferedUpdatesStream implements Accountable {
|
|||
|
||||
String currentField = null;
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
for (DocValuesUpdate update : updates) {
|
||||
Term term = update.term;
|
||||
|
@ -658,14 +658,14 @@ class BufferedUpdatesStream implements Accountable {
|
|||
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
// we don't need term frequencies for this
|
||||
docsEnum = termsEnum.docs(segState.rld.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
|
||||
postingsEnum = termsEnum.postings(segState.rld.getLiveDocs(), postingsEnum, PostingsEnum.FLAG_NONE);
|
||||
|
||||
DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.getUpdates(update.field, update.type);
|
||||
if (dvUpdates == null) {
|
||||
dvUpdates = dvUpdatesContainer.newUpdates(update.field, update.type, segState.reader.maxDoc());
|
||||
}
|
||||
int doc;
|
||||
while ((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
while ((doc = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (doc >= limit) {
|
||||
break; // no more docs that can be updated for this term
|
||||
}
|
||||
|
|
|
@ -882,9 +882,9 @@ public class CheckIndex implements Closeable {
|
|||
final Status.TermIndexStatus status = new Status.TermIndexStatus();
|
||||
int computedFieldCount = 0;
|
||||
|
||||
DocsEnum docs = null;
|
||||
DocsEnum docsAndFreqs = null;
|
||||
DocsAndPositionsEnum postings = null;
|
||||
PostingsEnum docs = null;
|
||||
PostingsEnum docsAndFreqs = null;
|
||||
PostingsEnum postings = null;
|
||||
|
||||
String lastField = null;
|
||||
for (String field : fields) {
|
||||
|
@ -1026,8 +1026,8 @@ public class CheckIndex implements Closeable {
|
|||
}
|
||||
sumDocFreq += docFreq;
|
||||
|
||||
docs = termsEnum.docs(liveDocs, docs);
|
||||
postings = termsEnum.docsAndPositions(liveDocs, postings);
|
||||
docs = termsEnum.postings(liveDocs, docs);
|
||||
postings = termsEnum.postings(liveDocs, postings, PostingsEnum.FLAG_ALL);
|
||||
|
||||
if (hasFreqs == false) {
|
||||
if (termsEnum.totalTermFreq() != -1) {
|
||||
|
@ -1051,7 +1051,7 @@ public class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
final DocsEnum docs2;
|
||||
final PostingsEnum docs2;
|
||||
if (postings != null) {
|
||||
docs2 = postings;
|
||||
} else {
|
||||
|
@ -1152,7 +1152,7 @@ public class CheckIndex implements Closeable {
|
|||
// Re-count if there are deleted docs:
|
||||
if (liveDocs != null) {
|
||||
if (hasFreqs) {
|
||||
final DocsEnum docsNoDel = termsEnum.docs(null, docsAndFreqs);
|
||||
final PostingsEnum docsNoDel = termsEnum.postings(null, docsAndFreqs);
|
||||
docCount = 0;
|
||||
totalTermFreq = 0;
|
||||
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
@ -1161,7 +1161,7 @@ public class CheckIndex implements Closeable {
|
|||
totalTermFreq += docsNoDel.freq();
|
||||
}
|
||||
} else {
|
||||
final DocsEnum docsNoDel = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
|
||||
final PostingsEnum docsNoDel = termsEnum.postings(null, docs, PostingsEnum.FLAG_NONE);
|
||||
docCount = 0;
|
||||
totalTermFreq = -1;
|
||||
while(docsNoDel.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
|
@ -1188,7 +1188,7 @@ public class CheckIndex implements Closeable {
|
|||
if (hasPositions) {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
postings = termsEnum.docsAndPositions(liveDocs, postings);
|
||||
postings = termsEnum.postings(liveDocs, postings, PostingsEnum.FLAG_ALL);
|
||||
final int docID = postings.advance(skipDocID);
|
||||
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
|
@ -1247,7 +1247,7 @@ public class CheckIndex implements Closeable {
|
|||
} else {
|
||||
for(int idx=0;idx<7;idx++) {
|
||||
final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
|
||||
docs = termsEnum.docs(liveDocs, docs, DocsEnum.FLAG_NONE);
|
||||
docs = termsEnum.postings(liveDocs, docs, PostingsEnum.FLAG_NONE);
|
||||
final int docID = docs.advance(skipDocID);
|
||||
if (docID == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
|
@ -1315,7 +1315,7 @@ public class CheckIndex implements Closeable {
|
|||
}
|
||||
|
||||
int expectedDocFreq = termsEnum.docFreq();
|
||||
DocsEnum d = termsEnum.docs(null, null, DocsEnum.FLAG_NONE);
|
||||
PostingsEnum d = termsEnum.postings(null, null, PostingsEnum.FLAG_NONE);
|
||||
int docFreq = 0;
|
||||
while (d.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
docFreq++;
|
||||
|
@ -1356,7 +1356,7 @@ public class CheckIndex implements Closeable {
|
|||
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
|
||||
}
|
||||
|
||||
docs = termsEnum.docs(liveDocs, docs, DocsEnum.FLAG_NONE);
|
||||
docs = termsEnum.postings(liveDocs, docs, PostingsEnum.FLAG_NONE);
|
||||
if (docs == null) {
|
||||
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
|
||||
}
|
||||
|
@ -1374,7 +1374,7 @@ public class CheckIndex implements Closeable {
|
|||
}
|
||||
|
||||
totDocFreq += termsEnum.docFreq();
|
||||
docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
|
||||
docs = termsEnum.postings(null, docs, PostingsEnum.FLAG_NONE);
|
||||
if (docs == null) {
|
||||
throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]);
|
||||
}
|
||||
|
@ -1806,12 +1806,12 @@ public class CheckIndex implements Closeable {
|
|||
infoStream.print(" test: term vectors........");
|
||||
}
|
||||
|
||||
DocsEnum docs = null;
|
||||
DocsAndPositionsEnum postings = null;
|
||||
PostingsEnum docs = null;
|
||||
PostingsEnum postings = null;
|
||||
|
||||
// Only used if crossCheckTermVectors is true:
|
||||
DocsEnum postingsDocs = null;
|
||||
DocsAndPositionsEnum postingsPostings = null;
|
||||
PostingsEnum postingsDocs = null;
|
||||
PostingsEnum postingsPostings = null;
|
||||
|
||||
final Bits liveDocs = reader.getLiveDocs();
|
||||
|
||||
|
@ -1878,16 +1878,16 @@ public class CheckIndex implements Closeable {
|
|||
while ((term = termsEnum.next()) != null) {
|
||||
|
||||
if (hasProx) {
|
||||
postings = termsEnum.docsAndPositions(null, postings);
|
||||
postings = termsEnum.postings(null, postings, PostingsEnum.FLAG_ALL);
|
||||
assert postings != null;
|
||||
docs = null;
|
||||
} else {
|
||||
docs = termsEnum.docs(null, docs);
|
||||
docs = termsEnum.postings(null, docs);
|
||||
assert docs != null;
|
||||
postings = null;
|
||||
}
|
||||
|
||||
final DocsEnum docs2;
|
||||
final PostingsEnum docs2;
|
||||
if (hasProx) {
|
||||
assert postings != null;
|
||||
docs2 = postings;
|
||||
|
@ -1896,14 +1896,14 @@ public class CheckIndex implements Closeable {
|
|||
docs2 = docs;
|
||||
}
|
||||
|
||||
final DocsEnum postingsDocs2;
|
||||
final PostingsEnum postingsDocs2;
|
||||
if (!postingsTermsEnum.seekExact(term)) {
|
||||
throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
|
||||
}
|
||||
postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings);
|
||||
postingsPostings = postingsTermsEnum.postings(null, postingsPostings, PostingsEnum.FLAG_ALL);
|
||||
if (postingsPostings == null) {
|
||||
// Term vectors were indexed w/ pos but postings were not
|
||||
postingsDocs = postingsTermsEnum.docs(null, postingsDocs);
|
||||
postingsDocs = postingsTermsEnum.postings(null, postingsDocs);
|
||||
if (postingsDocs == null) {
|
||||
throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j);
|
||||
}
|
||||
|
|
|
@ -1,62 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.Bits; // javadocs
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Also iterates through positions. */
|
||||
public abstract class DocsAndPositionsEnum extends DocsEnum {
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
|
||||
* if you require offsets in the returned enum. */
|
||||
public static final int FLAG_OFFSETS = 0x1;
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}
|
||||
* if you require payloads in the returned enum. */
|
||||
public static final int FLAG_PAYLOADS = 0x2;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected DocsAndPositionsEnum() {
|
||||
}
|
||||
|
||||
/** Returns the next position. You should only call this
|
||||
* up to {@link DocsEnum#freq()} times else
|
||||
* the behavior is not defined. If positions were not
|
||||
* indexed this will return -1; this only happens if
|
||||
* offsets were indexed and you passed needsOffset=true
|
||||
* when pulling the enum. */
|
||||
public abstract int nextPosition() throws IOException;
|
||||
|
||||
/** Returns start offset for the current position, or -1
|
||||
* if offsets were not indexed. */
|
||||
public abstract int startOffset() throws IOException;
|
||||
|
||||
/** Returns end offset for the current position, or -1 if
|
||||
* offsets were not indexed. */
|
||||
public abstract int endOffset() throws IOException;
|
||||
|
||||
/** Returns the payload at this position, or null if no
|
||||
* payload was indexed. You should not modify anything
|
||||
* (neither members of the returned BytesRef nor bytes
|
||||
* in the byte[]). */
|
||||
public abstract BytesRef getPayload() throws IOException;
|
||||
}
|
|
@ -19,49 +19,52 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits; // javadocs
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Iterates through the documents and term freqs.
|
||||
* NOTE: you must first call {@link #nextDoc} before using
|
||||
* any of the per-doc methods. */
|
||||
public abstract class DocsEnum extends DocIdSetIterator {
|
||||
|
||||
/**
|
||||
* Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)} if you don't
|
||||
* require term frequencies in the returned enum. When passed to
|
||||
* {@link TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)} means
|
||||
* that no offsets and payloads will be returned.
|
||||
*/
|
||||
public static final int FLAG_NONE = 0x0;
|
||||
/**
|
||||
* Convenience class returning empty values for positions, offsets and payloads
|
||||
*/
|
||||
public abstract class DocsEnum extends PostingsEnum {
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#docs(Bits,DocsEnum,int)}
|
||||
* if you require term frequencies in the returned enum. */
|
||||
public static final int FLAG_FREQS = 0x1;
|
||||
|
||||
private AttributeSource atts = null;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected DocsEnum() {
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term frequency in the current document, or 1 if the field was
|
||||
* indexed with {@link IndexOptions#DOCS}. Do not call this before
|
||||
* {@link #nextDoc} is first called, nor after {@link #nextDoc} returns
|
||||
* {@link DocIdSetIterator#NO_MORE_DOCS}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> if the {@link DocsEnum} was obtain with {@link #FLAG_NONE},
|
||||
* the result of this method is undefined.
|
||||
* @return -1, indicating no positions are available
|
||||
* @throws IOException if a low-level IO exception occurred
|
||||
*/
|
||||
public abstract int freq() throws IOException;
|
||||
|
||||
/** Returns the related attributes. */
|
||||
public AttributeSource attributes() {
|
||||
if (atts == null) atts = new AttributeSource();
|
||||
return atts;
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return -1, indicating no offsets are available
|
||||
* @throws IOException if a low-level IO exception occurred
|
||||
*/
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return -1, indicating no offsets are available
|
||||
* @throws IOException if a low-level IO exception occurred
|
||||
*/
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return null, indicating no payloads are available
|
||||
* @throws IOException if a low-level IO exception occurred
|
||||
*/
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -215,26 +215,22 @@ public class FilterLeafReader extends LeafReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
return in.docs(liveDocs, reuse, flags);
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
return in.postings(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return in.docsAndPositions(liveDocs, reuse, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/** Base class for filtering {@link DocsEnum} implementations. */
|
||||
public static class FilterDocsEnum extends DocsEnum {
|
||||
/** Base class for filtering {@link PostingsEnum} implementations. */
|
||||
public static class FilterDocsEnum extends PostingsEnum {
|
||||
/** The underlying DocsEnum instance. */
|
||||
protected final DocsEnum in;
|
||||
protected final PostingsEnum in;
|
||||
|
||||
/**
|
||||
* Create a new FilterDocsEnum
|
||||
* @param in the underlying DocsEnum instance.
|
||||
*/
|
||||
public FilterDocsEnum(DocsEnum in) {
|
||||
public FilterDocsEnum(PostingsEnum in) {
|
||||
if (in == null) {
|
||||
throw new NullPointerException("incoming DocsEnum cannot be null");
|
||||
}
|
||||
|
@ -266,53 +262,6 @@ public class FilterLeafReader extends LeafReader {
|
|||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
}
|
||||
|
||||
/** Base class for filtering {@link DocsAndPositionsEnum} implementations. */
|
||||
public static class FilterDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
/** The underlying DocsAndPositionsEnum instance. */
|
||||
protected final DocsAndPositionsEnum in;
|
||||
|
||||
/**
|
||||
* Create a new FilterDocsAndPositionsEnum
|
||||
* @param in the underlying DocsAndPositionsEnum instance.
|
||||
*/
|
||||
public FilterDocsAndPositionsEnum(DocsAndPositionsEnum in) {
|
||||
if (in == null) {
|
||||
throw new NullPointerException("incoming DocsAndPositionsEnum cannot be null");
|
||||
}
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeSource attributes() {
|
||||
return in.attributes();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return in.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return in.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return in.nextPosition();
|
||||
|
@ -332,7 +281,7 @@ public class FilterLeafReader extends LeafReader {
|
|||
public BytesRef getPayload() throws IOException {
|
||||
return in.getPayload();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
|
|
|
@ -179,13 +179,8 @@ public abstract class FilteredTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits bits, DocsEnum reuse, int flags) throws IOException {
|
||||
return tenum.docs(bits, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits bits, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return tenum.docsAndPositions(bits, reuse, flags);
|
||||
public PostingsEnum postings(Bits bits, PostingsEnum reuse, int flags) throws IOException {
|
||||
return tenum.postings(bits, reuse, flags);
|
||||
}
|
||||
|
||||
/** This enum does not support seeking!
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.index.FreqProxTermsWriterPerField.FreqProxPostingsArray;
|
||||
import org.apache.lucene.util.AttributeSource; // javadocs
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -230,14 +230,41 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
|
||||
if (liveDocs != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
|
||||
if ((flags & PostingsEnum.FLAG_POSITIONS) >= PostingsEnum.FLAG_POSITIONS) {
|
||||
FreqProxPostingsEnum posEnum;
|
||||
|
||||
if (!terms.hasProx) {
|
||||
// Caller wants positions but we didn't index them;
|
||||
// don't lie:
|
||||
throw new IllegalArgumentException("did not index positions");
|
||||
}
|
||||
|
||||
if (!terms.hasOffsets && (flags & PostingsEnum.FLAG_OFFSETS) == PostingsEnum.FLAG_OFFSETS) {
|
||||
// Caller wants offsets but we didn't index them;
|
||||
// don't lie:
|
||||
throw new IllegalArgumentException("did not index offsets");
|
||||
}
|
||||
|
||||
if (reuse instanceof FreqProxPostingsEnum) {
|
||||
posEnum = (FreqProxPostingsEnum) reuse;
|
||||
if (posEnum.postingsArray != postingsArray) {
|
||||
posEnum = new FreqProxPostingsEnum(terms, postingsArray);
|
||||
}
|
||||
} else {
|
||||
posEnum = new FreqProxPostingsEnum(terms, postingsArray);
|
||||
}
|
||||
posEnum.reset(sortedTermIDs[ord]);
|
||||
return posEnum;
|
||||
}
|
||||
|
||||
FreqProxDocsEnum docsEnum;
|
||||
|
||||
if (!terms.hasFreq && (flags & DocsEnum.FLAG_FREQS) != 0) {
|
||||
if (!terms.hasFreq && (flags & PostingsEnum.FLAG_FREQS) != 0) {
|
||||
// Caller wants freqs but we didn't index them;
|
||||
// don't lie:
|
||||
throw new IllegalArgumentException("did not index freq");
|
||||
|
@ -255,37 +282,6 @@ class FreqProxFields extends Fields {
|
|||
return docsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
if (liveDocs != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
FreqProxDocsAndPositionsEnum posEnum;
|
||||
|
||||
if (!terms.hasProx) {
|
||||
// Caller wants positions but we didn't index them;
|
||||
// don't lie:
|
||||
throw new IllegalArgumentException("did not index positions");
|
||||
}
|
||||
|
||||
if (!terms.hasOffsets && (flags & DocsAndPositionsEnum.FLAG_OFFSETS) != 0) {
|
||||
// Caller wants offsets but we didn't index them;
|
||||
// don't lie:
|
||||
throw new IllegalArgumentException("did not index offsets");
|
||||
}
|
||||
|
||||
if (reuse instanceof FreqProxDocsAndPositionsEnum) {
|
||||
posEnum = (FreqProxDocsAndPositionsEnum) reuse;
|
||||
if (posEnum.postingsArray != postingsArray) {
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
|
||||
}
|
||||
} else {
|
||||
posEnum = new FreqProxDocsAndPositionsEnum(terms, postingsArray);
|
||||
}
|
||||
posEnum.reset(sortedTermIDs[ord]);
|
||||
return posEnum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Returns the TermsEnums internal state to position the TermsEnum
|
||||
* without re-seeking the term dictionary.
|
||||
|
@ -347,6 +343,12 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
assert false : "Shouldn't be calling nextPositions on DocsEnum";
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (reader.eof()) {
|
||||
|
@ -389,7 +391,7 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
}
|
||||
|
||||
private static class FreqProxDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private static class FreqProxPostingsEnum extends PostingsEnum {
|
||||
|
||||
final FreqProxTermsWriterPerField terms;
|
||||
final FreqProxPostingsArray postingsArray;
|
||||
|
@ -407,7 +409,7 @@ class FreqProxFields extends Fields {
|
|||
boolean hasPayload;
|
||||
BytesRefBuilder payload = new BytesRefBuilder();
|
||||
|
||||
public FreqProxDocsAndPositionsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
|
||||
public FreqProxPostingsEnum(FreqProxTermsWriterPerField terms, FreqProxPostingsArray postingsArray) {
|
||||
this.terms = terms;
|
||||
this.postingsArray = postingsArray;
|
||||
this.readOffsets = terms.hasOffsets;
|
||||
|
|
|
@ -43,7 +43,7 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
Collections.sort(deleteTerms);
|
||||
String lastField = null;
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
PostingsEnum postingsEnum = null;
|
||||
for(Term deleteTerm : deleteTerms) {
|
||||
if (deleteTerm.field().equals(lastField) == false) {
|
||||
lastField = deleteTerm.field();
|
||||
|
@ -56,11 +56,11 @@ final class FreqProxTermsWriter extends TermsHash {
|
|||
}
|
||||
|
||||
if (termsEnum != null && termsEnum.seekExact(deleteTerm.bytes())) {
|
||||
docsEnum = termsEnum.docs(null, docsEnum, 0);
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, 0);
|
||||
int delDocLimit = segDeletes.get(deleteTerm);
|
||||
assert delDocLimit < DocsEnum.NO_MORE_DOCS;
|
||||
assert delDocLimit < PostingsEnum.NO_MORE_DOCS;
|
||||
while (true) {
|
||||
int doc = docsEnum.nextDoc();
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc < delDocLimit) {
|
||||
if (state.liveDocs == null) {
|
||||
state.liveDocs = state.segmentInfo.getCodec().liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount());
|
||||
|
|
|
@ -17,11 +17,11 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader.ReaderClosedListener;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** {@code LeafReader} is an abstract class, providing an interface for accessing an
|
||||
index. Search of an index is done entirely through this abstract interface,
|
||||
so that any subclass which implements it is searchable. IndexReaders implemented
|
||||
|
@ -205,38 +205,25 @@ public abstract class LeafReader extends IndexReader {
|
|||
return fields().terms(field);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsEnum} for the specified term.
|
||||
/** Returns {@link PostingsEnum} for the specified term.
|
||||
* This will return null if either the field or
|
||||
* term does not exist.
|
||||
* @see TermsEnum#docs(Bits, DocsEnum) */
|
||||
public final DocsEnum termDocsEnum(Term term) throws IOException {
|
||||
* @see TermsEnum#postings(Bits, PostingsEnum) */
|
||||
public final PostingsEnum termDocsEnum(Term term, int flags) throws IOException {
|
||||
assert term.field() != null;
|
||||
assert term.bytes() != null;
|
||||
final Terms terms = terms(term.field());
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
return termsEnum.docs(getLiveDocs(), null);
|
||||
return termsEnum.postings(getLiveDocs(), null, flags);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
* term. This will return null if the
|
||||
* field or term does not exist or positions weren't indexed.
|
||||
* @see TermsEnum#docsAndPositions(Bits, DocsAndPositionsEnum) */
|
||||
public final DocsAndPositionsEnum termPositionsEnum(Term term) throws IOException {
|
||||
assert term.field() != null;
|
||||
assert term.bytes() != null;
|
||||
final Terms terms = terms(term.field());
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
return termsEnum.docsAndPositions(getLiveDocs(), null);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
public final PostingsEnum termDocsEnum(Term term) throws IOException {
|
||||
return termDocsEnum(term, PostingsEnum.FLAG_FREQS);
|
||||
}
|
||||
|
||||
/** Returns {@link NumericDocValues} for this field, or
|
||||
|
|
|
@ -103,10 +103,23 @@ public class MappedMultiFields extends FilterFields {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
if (liveDocs != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
|
||||
MappingMultiPostingsEnum mappingDocsAndPositionsEnum;
|
||||
if (reuse instanceof MappingMultiPostingsEnum) {
|
||||
mappingDocsAndPositionsEnum = (MappingMultiPostingsEnum) reuse;
|
||||
} else {
|
||||
mappingDocsAndPositionsEnum = new MappingMultiPostingsEnum(mergeState);
|
||||
}
|
||||
|
||||
MultiPostingsEnum docsAndPositionsEnum = (MultiPostingsEnum) in.postings(liveDocs, mappingDocsAndPositionsEnum.multiDocsAndPositionsEnum, flags);
|
||||
mappingDocsAndPositionsEnum.reset(docsAndPositionsEnum);
|
||||
return mappingDocsAndPositionsEnum;
|
||||
|
||||
/*
|
||||
MappingMultiDocsEnum mappingDocsEnum;
|
||||
if (reuse instanceof MappingMultiDocsEnum) {
|
||||
mappingDocsEnum = (MappingMultiDocsEnum) reuse;
|
||||
|
@ -116,24 +129,7 @@ public class MappedMultiFields extends FilterFields {
|
|||
|
||||
MultiDocsEnum docsEnum = (MultiDocsEnum) in.docs(liveDocs, mappingDocsEnum.multiDocsEnum, flags);
|
||||
mappingDocsEnum.reset(docsEnum);
|
||||
return mappingDocsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
if (liveDocs != null) {
|
||||
throw new IllegalArgumentException("liveDocs must be null");
|
||||
}
|
||||
MappingMultiDocsAndPositionsEnum mappingDocsAndPositionsEnum;
|
||||
if (reuse instanceof MappingMultiDocsAndPositionsEnum) {
|
||||
mappingDocsAndPositionsEnum = (MappingMultiDocsAndPositionsEnum) reuse;
|
||||
} else {
|
||||
mappingDocsAndPositionsEnum = new MappingMultiDocsAndPositionsEnum(mergeState);
|
||||
}
|
||||
|
||||
MultiDocsAndPositionsEnum docsAndPositionsEnum = (MultiDocsAndPositionsEnum) in.docsAndPositions(liveDocs, mappingDocsAndPositionsEnum.multiDocsAndPositionsEnum, flags);
|
||||
mappingDocsAndPositionsEnum.reset(docsAndPositionsEnum);
|
||||
return mappingDocsAndPositionsEnum;
|
||||
return mappingDocsEnum;*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,121 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.MultiDocsEnum.EnumWithSlice;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Exposes flex API, merged from flex API of sub-segments,
|
||||
* remapping docIDs (this is used for segment merging).
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
final class MappingMultiDocsEnum extends DocsEnum {
|
||||
private MultiDocsEnum.EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
MergeState.DocMap currentMap;
|
||||
DocsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
private final MergeState mergeState;
|
||||
MultiDocsEnum multiDocsEnum;
|
||||
|
||||
/** Sole constructor. */
|
||||
public MappingMultiDocsEnum(MergeState mergeState) {
|
||||
this.mergeState = mergeState;
|
||||
}
|
||||
|
||||
MappingMultiDocsEnum reset(MultiDocsEnum docsEnum) {
|
||||
this.numSubs = docsEnum.getNumSubs();
|
||||
this.subs = docsEnum.getSubs();
|
||||
this.multiDocsEnum = docsEnum;
|
||||
upto = -1;
|
||||
current = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** How many sub-readers we are merging.
|
||||
* @see #getSubs */
|
||||
public int getNumSubs() {
|
||||
return numSubs;
|
||||
}
|
||||
|
||||
/** Returns sub-readers we are merging. */
|
||||
public EnumWithSlice[] getSubs() {
|
||||
return subs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while(true) {
|
||||
if (current == null) {
|
||||
if (upto == numSubs-1) {
|
||||
return this.doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
upto++;
|
||||
final int reader = subs[upto].slice.readerIndex;
|
||||
current = subs[upto].docsEnum;
|
||||
currentBase = mergeState.docBase[reader];
|
||||
currentMap = mergeState.docMaps[reader];
|
||||
assert currentMap.maxDoc() == subs[upto].slice.length: "readerIndex=" + reader + " subs.len=" + subs.length + " len1=" + currentMap.maxDoc() + " vs " + subs[upto].slice.length;
|
||||
}
|
||||
}
|
||||
|
||||
int doc = current.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
// compact deletions
|
||||
doc = currentMap.get(doc);
|
||||
if (doc == -1) {
|
||||
continue;
|
||||
}
|
||||
return this.doc = currentBase + doc;
|
||||
} else {
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long cost = 0;
|
||||
for (EnumWithSlice enumWithSlice : subs) {
|
||||
cost += enumWithSlice.docsEnum.cost();
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
}
|
||||
|
|
@ -17,11 +17,11 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.MultiDocsAndPositionsEnum.EnumWithSlice;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.MultiPostingsEnum.EnumWithSlice;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Exposes flex API, merged from flex API of sub-segments,
|
||||
* remapping docIDs (this is used for segment merging).
|
||||
|
@ -29,23 +29,23 @@ import java.io.IOException;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
private MultiDocsAndPositionsEnum.EnumWithSlice[] subs;
|
||||
final class MappingMultiPostingsEnum extends PostingsEnum {
|
||||
private MultiPostingsEnum.EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
MergeState.DocMap currentMap;
|
||||
DocsAndPositionsEnum current;
|
||||
PostingsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
private MergeState mergeState;
|
||||
MultiDocsAndPositionsEnum multiDocsAndPositionsEnum;
|
||||
MultiPostingsEnum multiDocsAndPositionsEnum;
|
||||
|
||||
/** Sole constructor. */
|
||||
public MappingMultiDocsAndPositionsEnum(MergeState mergeState) {
|
||||
public MappingMultiPostingsEnum(MergeState mergeState) {
|
||||
this.mergeState = mergeState;
|
||||
}
|
||||
|
||||
MappingMultiDocsAndPositionsEnum reset(MultiDocsAndPositionsEnum postingsEnum) {
|
||||
MappingMultiPostingsEnum reset(MultiPostingsEnum postingsEnum) {
|
||||
this.numSubs = postingsEnum.getNumSubs();
|
||||
this.subs = postingsEnum.getSubs();
|
||||
upto = -1;
|
||||
|
@ -89,7 +89,7 @@ final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
} else {
|
||||
upto++;
|
||||
final int reader = subs[upto].slice.readerIndex;
|
||||
current = subs[upto].docsAndPositionsEnum;
|
||||
current = subs[upto].postingsEnum;
|
||||
currentBase = mergeState.docBase[reader];
|
||||
currentMap = mergeState.docMaps[reader];
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
public long cost() {
|
||||
long cost = 0;
|
||||
for (EnumWithSlice enumWithSlice : subs) {
|
||||
cost += enumWithSlice.docsAndPositionsEnum.cost();
|
||||
cost += enumWithSlice.postingsEnum.cost();
|
||||
}
|
||||
return cost;
|
||||
}
|
|
@ -1,176 +0,0 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* Exposes {@link DocsEnum}, merged from {@link DocsEnum}
|
||||
* API of sub-segments.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class MultiDocsEnum extends DocsEnum {
|
||||
private final MultiTermsEnum parent;
|
||||
final DocsEnum[] subDocsEnum;
|
||||
private final EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
DocsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
|
||||
/** Sole constructor
|
||||
* @param parent The {@link MultiTermsEnum} that created us.
|
||||
* @param subReaderCount How many sub-readers are being merged. */
|
||||
public MultiDocsEnum(MultiTermsEnum parent, int subReaderCount) {
|
||||
this.parent = parent;
|
||||
subDocsEnum = new DocsEnum[subReaderCount];
|
||||
this.subs = new EnumWithSlice[subReaderCount];
|
||||
for (int i = 0; i < subs.length; i++) {
|
||||
subs[i] = new EnumWithSlice();
|
||||
}
|
||||
}
|
||||
|
||||
MultiDocsEnum reset(final EnumWithSlice[] subs, final int numSubs) {
|
||||
this.numSubs = numSubs;
|
||||
|
||||
for(int i=0;i<numSubs;i++) {
|
||||
this.subs[i].docsEnum = subs[i].docsEnum;
|
||||
this.subs[i].slice = subs[i].slice;
|
||||
}
|
||||
upto = -1;
|
||||
doc = -1;
|
||||
current = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Returns {@code true} if this instance can be reused by
|
||||
* the provided {@link MultiTermsEnum}. */
|
||||
public boolean canReuse(MultiTermsEnum parent) {
|
||||
return this.parent == parent;
|
||||
}
|
||||
|
||||
/** How many sub-readers we are merging.
|
||||
* @see #getSubs */
|
||||
public int getNumSubs() {
|
||||
return numSubs;
|
||||
}
|
||||
|
||||
/** Returns sub-readers we are merging. */
|
||||
public EnumWithSlice[] getSubs() {
|
||||
return subs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
assert target > doc;
|
||||
while(true) {
|
||||
if (current != null) {
|
||||
final int doc;
|
||||
if (target < currentBase) {
|
||||
// target was in the previous slice but there was no matching doc after it
|
||||
doc = current.nextDoc();
|
||||
} else {
|
||||
doc = current.advance(target-currentBase);
|
||||
}
|
||||
if (doc == NO_MORE_DOCS) {
|
||||
current = null;
|
||||
} else {
|
||||
return this.doc = doc + currentBase;
|
||||
}
|
||||
} else if (upto == numSubs-1) {
|
||||
return this.doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
upto++;
|
||||
current = subs[upto].docsEnum;
|
||||
currentBase = subs[upto].slice.start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
while(true) {
|
||||
if (current == null) {
|
||||
if (upto == numSubs-1) {
|
||||
return this.doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
upto++;
|
||||
current = subs[upto].docsEnum;
|
||||
currentBase = subs[upto].slice.start;
|
||||
}
|
||||
}
|
||||
|
||||
final int doc = current.nextDoc();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
return this.doc = currentBase + doc;
|
||||
} else {
|
||||
current = null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
long cost = 0;
|
||||
for (int i = 0; i < numSubs; i++) {
|
||||
cost += subs[i].docsEnum.cost();
|
||||
}
|
||||
return cost;
|
||||
}
|
||||
|
||||
// TODO: implement bulk read more efficiently than super
|
||||
/** Holds a {@link DocsEnum} along with the
|
||||
* corresponding {@link ReaderSlice}. */
|
||||
public final static class EnumWithSlice {
|
||||
EnumWithSlice() {
|
||||
}
|
||||
|
||||
/** {@link DocsEnum} of this sub-reader. */
|
||||
public DocsEnum docsEnum;
|
||||
|
||||
/** {@link ReaderSlice} describing how this sub-reader
|
||||
* fits into the composite reader. */
|
||||
public ReaderSlice slice;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return slice.toString()+":"+docsEnum;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MultiDocsEnum(" + Arrays.toString(getSubs()) + ")";
|
||||
}
|
||||
}
|
||||
|
|
@ -120,55 +120,55 @@ public final class MultiFields extends Fields {
|
|||
return getFields(r).terms(field);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsEnum} for the specified field and
|
||||
/** Returns {@link PostingsEnum} for the specified field and
|
||||
* term. This will return null if the field or term does
|
||||
* not exist. */
|
||||
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
return getTermDocsEnum(r, liveDocs, field, term, DocsEnum.FLAG_FREQS);
|
||||
public static PostingsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
return getTermDocsEnum(r, liveDocs, field, term, PostingsEnum.FLAG_FREQS);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsEnum} for the specified field and
|
||||
/** Returns {@link PostingsEnum} for the specified field and
|
||||
* term, with control over whether freqs are required.
|
||||
* Some codecs may be able to optimize their
|
||||
* implementation when freqs are not required. This will
|
||||
* return null if the field or term does not exist. See {@link
|
||||
* TermsEnum#docs(Bits,DocsEnum,int)}.*/
|
||||
public static DocsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
|
||||
* TermsEnum#postings(Bits, PostingsEnum,int)}.*/
|
||||
public static PostingsEnum getTermDocsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
|
||||
assert field != null;
|
||||
assert term != null;
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term)) {
|
||||
return termsEnum.docs(liveDocs, null, flags);
|
||||
return termsEnum.postings(liveDocs, null, flags);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
/** Returns {@link PostingsEnum} for the specified
|
||||
* field and term. This will return null if the field or
|
||||
* term does not exist or positions were not indexed.
|
||||
* @see #getTermPositionsEnum(IndexReader, Bits, String, BytesRef, int) */
|
||||
public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
return getTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
public static PostingsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term) throws IOException {
|
||||
return getTermPositionsEnum(r, liveDocs, field, term, PostingsEnum.FLAG_OFFSETS | PostingsEnum.FLAG_PAYLOADS);
|
||||
}
|
||||
|
||||
/** Returns {@link DocsAndPositionsEnum} for the specified
|
||||
/** Returns {@link PostingsEnum} for the specified
|
||||
* field and term, with control over whether offsets and payloads are
|
||||
* required. Some codecs may be able to optimize
|
||||
* their implementation when offsets and/or payloads are not
|
||||
* required. This will return null if the field or term does not
|
||||
* exist or positions were not indexed. See {@link
|
||||
* TermsEnum#docsAndPositions(Bits,DocsAndPositionsEnum,int)}. */
|
||||
public static DocsAndPositionsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
|
||||
* TermsEnum#postings(Bits, PostingsEnum,int)}. */
|
||||
public static PostingsEnum getTermPositionsEnum(IndexReader r, Bits liveDocs, String field, BytesRef term, int flags) throws IOException {
|
||||
assert field != null;
|
||||
assert term != null;
|
||||
final Terms terms = getTerms(r, field);
|
||||
if (terms != null) {
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
if (termsEnum.seekExact(term)) {
|
||||
return termsEnum.docsAndPositions(liveDocs, null, flags);
|
||||
return termsEnum.postings(liveDocs, null, flags);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
|
|
|
@ -17,31 +17,34 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Exposes flex API, merged from flex API of sub-segments.
|
||||
* Exposes {@link PostingsEnum}, merged from {@link PostingsEnum}
|
||||
* API of sub-segments.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
public final class MultiPostingsEnum extends PostingsEnum {
|
||||
private final MultiTermsEnum parent;
|
||||
final DocsAndPositionsEnum[] subDocsAndPositionsEnum;
|
||||
final PostingsEnum[] subPostingsEnums;
|
||||
private final EnumWithSlice[] subs;
|
||||
int numSubs;
|
||||
int upto;
|
||||
DocsAndPositionsEnum current;
|
||||
PostingsEnum current;
|
||||
int currentBase;
|
||||
int doc = -1;
|
||||
|
||||
/** Sole constructor. */
|
||||
public MultiDocsAndPositionsEnum(MultiTermsEnum parent, int subReaderCount) {
|
||||
/** Sole constructor.
|
||||
* @param parent The {@link MultiTermsEnum} that created us.
|
||||
* @param subReaderCount How many sub-readers are being merged. */
|
||||
public MultiPostingsEnum(MultiTermsEnum parent, int subReaderCount) {
|
||||
this.parent = parent;
|
||||
subDocsAndPositionsEnum = new DocsAndPositionsEnum[subReaderCount];
|
||||
subPostingsEnums = new PostingsEnum[subReaderCount];
|
||||
this.subs = new EnumWithSlice[subReaderCount];
|
||||
for (int i = 0; i < subs.length; i++) {
|
||||
subs[i] = new EnumWithSlice();
|
||||
|
@ -55,10 +58,10 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
/** Rre-use and reset this instance on the provided slices. */
|
||||
public MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) {
|
||||
public MultiPostingsEnum reset(final EnumWithSlice[] subs, final int numSubs) {
|
||||
this.numSubs = numSubs;
|
||||
for(int i=0;i<numSubs;i++) {
|
||||
this.subs[i].docsAndPositionsEnum = subs[i].docsAndPositionsEnum;
|
||||
this.subs[i].postingsEnum = subs[i].postingsEnum;
|
||||
this.subs[i].slice = subs[i].slice;
|
||||
}
|
||||
upto = -1;
|
||||
|
@ -110,7 +113,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
return this.doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
upto++;
|
||||
current = subs[upto].docsAndPositionsEnum;
|
||||
current = subs[upto].postingsEnum;
|
||||
currentBase = subs[upto].slice.start;
|
||||
}
|
||||
}
|
||||
|
@ -124,7 +127,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
return this.doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
upto++;
|
||||
current = subs[upto].docsAndPositionsEnum;
|
||||
current = subs[upto].postingsEnum;
|
||||
currentBase = subs[upto].slice.start;
|
||||
}
|
||||
}
|
||||
|
@ -159,14 +162,14 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
// TODO: implement bulk read more efficiently than super
|
||||
/** Holds a {@link DocsAndPositionsEnum} along with the
|
||||
/** Holds a {@link PostingsEnum} along with the
|
||||
* corresponding {@link ReaderSlice}. */
|
||||
public final static class EnumWithSlice {
|
||||
EnumWithSlice() {
|
||||
}
|
||||
|
||||
/** {@link DocsAndPositionsEnum} for this sub-reader. */
|
||||
public DocsAndPositionsEnum docsAndPositionsEnum;
|
||||
/** {@link PostingsEnum} for this sub-reader. */
|
||||
public PostingsEnum postingsEnum;
|
||||
|
||||
/** {@link ReaderSlice} describing how this sub-reader
|
||||
* fits into the composite reader. */
|
||||
|
@ -174,7 +177,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return slice.toString()+":"+docsAndPositionsEnum;
|
||||
return slice.toString()+":"+ postingsEnum;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -182,7 +185,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
public long cost() {
|
||||
long cost = 0;
|
||||
for (int i = 0; i < numSubs; i++) {
|
||||
cost += subs[i].docsAndPositionsEnum.cost();
|
||||
cost += subs[i].postingsEnum.cost();
|
||||
}
|
||||
return cost;
|
||||
}
|
|
@ -17,14 +17,14 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* Exposes {@link TermsEnum} API, merged from {@link TermsEnum} API of sub-segments.
|
||||
* This does a merge sort, by term text, of the sub-readers.
|
||||
|
@ -37,8 +37,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader)
|
||||
private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field
|
||||
private final TermsEnumWithSlice[] top;
|
||||
private final MultiDocsEnum.EnumWithSlice[] subDocs;
|
||||
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
|
||||
private final MultiPostingsEnum.EnumWithSlice[] subDocs;
|
||||
|
||||
private BytesRef lastSeek;
|
||||
private boolean lastSeekExact;
|
||||
|
@ -77,14 +76,11 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
queue = new TermMergeQueue(slices.length);
|
||||
top = new TermsEnumWithSlice[slices.length];
|
||||
subs = new TermsEnumWithSlice[slices.length];
|
||||
subDocs = new MultiDocsEnum.EnumWithSlice[slices.length];
|
||||
subDocsAndPositions = new MultiDocsAndPositionsEnum.EnumWithSlice[slices.length];
|
||||
subDocs = new MultiPostingsEnum.EnumWithSlice[slices.length];
|
||||
for(int i=0;i<slices.length;i++) {
|
||||
subs[i] = new TermsEnumWithSlice(i, slices[i]);
|
||||
subDocs[i] = new MultiDocsEnum.EnumWithSlice();
|
||||
subDocs[i] = new MultiPostingsEnum.EnumWithSlice();
|
||||
subDocs[i].slice = slices[i];
|
||||
subDocsAndPositions[i] = new MultiDocsAndPositionsEnum.EnumWithSlice();
|
||||
subDocsAndPositions[i].slice = slices[i];
|
||||
}
|
||||
currentSubs = new TermsEnumWithSlice[slices.length];
|
||||
}
|
||||
|
@ -331,17 +327,18 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
MultiDocsEnum docsEnum;
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
MultiPostingsEnum docsEnum;
|
||||
|
||||
// Can only reuse if incoming enum is also a MultiDocsEnum
|
||||
if (reuse != null && reuse instanceof MultiDocsEnum) {
|
||||
docsEnum = (MultiDocsEnum) reuse;
|
||||
if (reuse != null && reuse instanceof MultiPostingsEnum) {
|
||||
docsEnum = (MultiPostingsEnum) reuse;
|
||||
// ... and was previously created w/ this MultiTermsEnum:
|
||||
if (!docsEnum.canReuse(this)) {
|
||||
docsEnum = new MultiDocsEnum(this, subs.length);
|
||||
docsEnum = new MultiPostingsEnum(this, subs.length);
|
||||
}
|
||||
} else {
|
||||
docsEnum = new MultiDocsEnum(this, subs.length);
|
||||
docsEnum = new MultiPostingsEnum(this, subs.length);
|
||||
}
|
||||
|
||||
final MultiBits multiLiveDocs;
|
||||
|
@ -380,16 +377,16 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
b = null;
|
||||
}
|
||||
|
||||
assert entry.index < docsEnum.subDocsEnum.length: entry.index + " vs " + docsEnum.subDocsEnum.length + "; " + subs.length;
|
||||
final DocsEnum subDocsEnum = entry.terms.docs(b, docsEnum.subDocsEnum[entry.index], flags);
|
||||
if (subDocsEnum != null) {
|
||||
docsEnum.subDocsEnum[entry.index] = subDocsEnum;
|
||||
subDocs[upto].docsEnum = subDocsEnum;
|
||||
assert entry.index < docsEnum.subPostingsEnums.length: entry.index + " vs " + docsEnum.subPostingsEnums.length + "; " + subs.length;
|
||||
final PostingsEnum subPostingsEnum = entry.terms.postings(b, docsEnum.subPostingsEnums[entry.index], flags);
|
||||
if (subPostingsEnum != null) {
|
||||
docsEnum.subPostingsEnums[entry.index] = subPostingsEnum;
|
||||
subDocs[upto].postingsEnum = subPostingsEnum;
|
||||
subDocs[upto].slice = entry.subSlice;
|
||||
upto++;
|
||||
} else {
|
||||
// should this be an error?
|
||||
assert false : "One of our subs cannot provide a docsenum";
|
||||
return null; // We can't support what is being asked for
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -400,82 +397,6 @@ public final class MultiTermsEnum extends TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
MultiDocsAndPositionsEnum docsAndPositionsEnum;
|
||||
// Can only reuse if incoming enum is also a MultiDocsAndPositionsEnum
|
||||
if (reuse != null && reuse instanceof MultiDocsAndPositionsEnum) {
|
||||
docsAndPositionsEnum = (MultiDocsAndPositionsEnum) reuse;
|
||||
// ... and was previously created w/ this MultiTermsEnum:
|
||||
if (!docsAndPositionsEnum.canReuse(this)) {
|
||||
docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
|
||||
}
|
||||
} else {
|
||||
docsAndPositionsEnum = new MultiDocsAndPositionsEnum(this, subs.length);
|
||||
}
|
||||
|
||||
final MultiBits multiLiveDocs;
|
||||
if (liveDocs instanceof MultiBits) {
|
||||
multiLiveDocs = (MultiBits) liveDocs;
|
||||
} else {
|
||||
multiLiveDocs = null;
|
||||
}
|
||||
|
||||
int upto = 0;
|
||||
|
||||
for(int i=0;i<numTop;i++) {
|
||||
|
||||
final TermsEnumWithSlice entry = top[i];
|
||||
|
||||
final Bits b;
|
||||
|
||||
if (multiLiveDocs != null) {
|
||||
// Optimize for common case: requested skip docs is a
|
||||
// congruent sub-slice of MultiBits: in this case, we
|
||||
// just pull the liveDocs from the sub reader, rather
|
||||
// than making the inefficient
|
||||
// Slice(Multi(sub-readers)):
|
||||
final MultiBits.SubResult sub = multiLiveDocs.getMatchingSub(top[i].subSlice);
|
||||
if (sub.matches) {
|
||||
b = sub.result;
|
||||
} else {
|
||||
// custom case: requested skip docs is foreign:
|
||||
// must slice it on every access (very
|
||||
// inefficient)
|
||||
b = new BitsSlice(liveDocs, top[i].subSlice);
|
||||
}
|
||||
} else if (liveDocs != null) {
|
||||
b = new BitsSlice(liveDocs, top[i].subSlice);
|
||||
} else {
|
||||
// no deletions
|
||||
b = null;
|
||||
}
|
||||
|
||||
assert entry.index < docsAndPositionsEnum.subDocsAndPositionsEnum.length: entry.index + " vs " + docsAndPositionsEnum.subDocsAndPositionsEnum.length + "; " + subs.length;
|
||||
final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index], flags);
|
||||
|
||||
if (subPostings != null) {
|
||||
docsAndPositionsEnum.subDocsAndPositionsEnum[entry.index] = subPostings;
|
||||
subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
|
||||
subDocsAndPositions[upto].slice = entry.subSlice;
|
||||
upto++;
|
||||
} else {
|
||||
if (entry.terms.docs(b, null, DocsEnum.FLAG_NONE) != null) {
|
||||
// At least one of our subs does not store
|
||||
// offsets or positions -- we can't correctly
|
||||
// produce a MultiDocsAndPositions enum
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (upto == 0) {
|
||||
return null;
|
||||
} else {
|
||||
return docsAndPositionsEnum.reset(subDocsAndPositions, upto);
|
||||
}
|
||||
}
|
||||
|
||||
final static class TermsEnumWithSlice {
|
||||
private final ReaderSlice subSlice;
|
||||
TermsEnum terms;
|
||||
|
|
|
@ -0,0 +1,114 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Iterates through the postings.
|
||||
* NOTE: you must first call {@link #nextDoc} before using
|
||||
* any of the per-doc methods. */
|
||||
public abstract class PostingsEnum extends DocIdSetIterator {
|
||||
|
||||
/**
|
||||
* Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)} if you don't
|
||||
* require per-document postings in the returned enum.
|
||||
*/
|
||||
public static final int FLAG_NONE = 0x0;
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)}
|
||||
* if you require term frequencies in the returned enum. */
|
||||
public static final int FLAG_FREQS = 0x1;
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)}
|
||||
* if you require term positions in the returned enum. */
|
||||
public static final int FLAG_POSITIONS = 0x3;
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)}
|
||||
* if you require offsets in the returned enum. */
|
||||
public static final int FLAG_OFFSETS = 0x7;
|
||||
|
||||
/** Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)}
|
||||
* if you require payloads in the returned enum. */
|
||||
public static final int FLAG_PAYLOADS = 0xB;
|
||||
|
||||
/**
|
||||
* Flag to pass to {@link TermsEnum#postings(Bits, PostingsEnum, int)}
|
||||
* to get positions, payloads and offsets in the returned enum
|
||||
*/
|
||||
public static final int FLAG_ALL = FLAG_POSITIONS | FLAG_PAYLOADS;
|
||||
|
||||
/**
|
||||
* Returns true if the passed in flags require positions to be indexed
|
||||
* @param flags the postings flags
|
||||
* @return true if the passed in flags require positions to be indexed
|
||||
*/
|
||||
public static boolean requiresPositions(int flags) {
|
||||
return ((flags & FLAG_POSITIONS) >= FLAG_POSITIONS);
|
||||
}
|
||||
|
||||
private AttributeSource atts = null;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected PostingsEnum() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns term frequency in the current document, or 1 if the field was
|
||||
* indexed with {@link IndexOptions#DOCS}. Do not call this before
|
||||
* {@link #nextDoc} is first called, nor after {@link #nextDoc} returns
|
||||
* {@link DocIdSetIterator#NO_MORE_DOCS}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> if the {@link PostingsEnum} was obtain with {@link #FLAG_NONE},
|
||||
* the result of this method is undefined.
|
||||
*/
|
||||
public abstract int freq() throws IOException;
|
||||
|
||||
/** Returns the related attributes. */
|
||||
public AttributeSource attributes() {
|
||||
if (atts == null) atts = new AttributeSource();
|
||||
return atts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next position. If there are no more
|
||||
* positions, or the iterator does not support positions,
|
||||
* this will return DocsEnum.NO_MORE_POSITIONS */
|
||||
public abstract int nextPosition() throws IOException;
|
||||
|
||||
/** Returns start offset for the current position, or -1
|
||||
* if offsets were not indexed. */
|
||||
public abstract int startOffset() throws IOException;
|
||||
|
||||
/** Returns end offset for the current position, or -1 if
|
||||
* offsets were not indexed. */
|
||||
public abstract int endOffset() throws IOException;
|
||||
|
||||
/** Returns the payload at this position, or null if no
|
||||
* payload was indexed. You should not modify anything
|
||||
* (neither members of the returned BytesRef nor bytes
|
||||
* in the byte[]). */
|
||||
public abstract BytesRef getPayload() throws IOException;
|
||||
|
||||
}
|
|
@ -106,12 +106,7 @@ class SortedDocValuesTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,12 +17,12 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** Implements a {@link TermsEnum} wrapping a provided
|
||||
* {@link SortedSetDocValues}. */
|
||||
|
||||
|
@ -106,12 +106,7 @@ class SortedSetDocValuesTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
|
|
|
@ -17,11 +17,11 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Maintains a {@link IndexReader} {@link TermState} view over
|
||||
* {@link IndexReader} instances containing a single term. The
|
||||
|
|
|
@ -111,7 +111,7 @@ public abstract class Terms {
|
|||
public abstract int getDocCount() throws IOException;
|
||||
|
||||
/** Returns true if documents in this field store
|
||||
* per-document term frequency ({@link DocsEnum#freq}). */
|
||||
* per-document term frequency ({@link PostingsEnum#freq}). */
|
||||
public abstract boolean hasFreqs();
|
||||
|
||||
/** Returns true if documents in this field store offsets. */
|
||||
|
|
|
@ -27,9 +27,9 @@ import org.apache.lucene.util.BytesRefIterator;
|
|||
/** Iterator to seek ({@link #seekCeil(BytesRef)}, {@link
|
||||
* #seekExact(BytesRef)}) or step through ({@link
|
||||
* #next} terms to obtain frequency information ({@link
|
||||
* #docFreq}), {@link DocsEnum} or {@link
|
||||
* DocsAndPositionsEnum} for the current term ({@link
|
||||
* #docs}.
|
||||
* #docFreq}), {@link PostingsEnum} or {@link
|
||||
* PostingsEnum} for the current term ({@link
|
||||
* #postings}.
|
||||
*
|
||||
* <p>Term enumerations are always ordered by
|
||||
* BytesRef.compareTo, which is Unicode sort
|
||||
|
@ -138,57 +138,30 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
* deleted documents into account. */
|
||||
public abstract long totalTermFreq() throws IOException;
|
||||
|
||||
/** Get {@link DocsEnum} for the current term. Do not
|
||||
/** Get {@link PostingsEnum} for the current term. Do not
|
||||
* call this when the enum is unpositioned. This method
|
||||
* will not return null.
|
||||
*
|
||||
* @param liveDocs unset bits are documents that should not
|
||||
* be returned
|
||||
* @param reuse pass a prior DocsEnum for possible reuse */
|
||||
public final DocsEnum docs(Bits liveDocs, DocsEnum reuse) throws IOException {
|
||||
return docs(liveDocs, reuse, DocsEnum.FLAG_FREQS);
|
||||
* @param reuse pass a prior PostingsEnum for possible reuse */
|
||||
public final PostingsEnum postings(Bits liveDocs, PostingsEnum reuse) throws IOException {
|
||||
return postings(liveDocs, reuse, PostingsEnum.FLAG_FREQS);
|
||||
}
|
||||
|
||||
/** Get {@link DocsEnum} for the current term, with
|
||||
* control over whether freqs are required. Do not
|
||||
* call this when the enum is unpositioned. This method
|
||||
* will not return null.
|
||||
/** Get {@link PostingsEnum} for the current term, with
|
||||
* control over whether freqs, positions, offsets or payloads
|
||||
* are required. Do not call this when the enum is
|
||||
* unpositioned. This method may return null if the postings
|
||||
* information required is not available from the index
|
||||
*
|
||||
* @param liveDocs unset bits are documents that should not
|
||||
* be returned
|
||||
* @param reuse pass a prior DocsEnum for possible reuse
|
||||
* @param reuse pass a prior PostingsEnum for possible reuse
|
||||
* @param flags specifies which optional per-document values
|
||||
* you require; see {@link DocsEnum#FLAG_FREQS}
|
||||
* @see #docs(Bits, DocsEnum, int) */
|
||||
public abstract DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException;
|
||||
|
||||
/** Get {@link DocsAndPositionsEnum} for the current term.
|
||||
* Do not call this when the enum is unpositioned. This
|
||||
* method will return null if positions were not
|
||||
* indexed.
|
||||
*
|
||||
* @param liveDocs unset bits are documents that should not
|
||||
* be returned
|
||||
* @param reuse pass a prior DocsAndPositionsEnum for possible reuse
|
||||
* @see #docsAndPositions(Bits, DocsAndPositionsEnum, int) */
|
||||
public final DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse) throws IOException {
|
||||
return docsAndPositions(liveDocs, reuse, DocsAndPositionsEnum.FLAG_OFFSETS | DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
}
|
||||
|
||||
/** Get {@link DocsAndPositionsEnum} for the current term,
|
||||
* with control over whether offsets and payloads are
|
||||
* required. Some codecs may be able to optimize their
|
||||
* implementation when offsets and/or payloads are not required.
|
||||
* Do not call this when the enum is unpositioned. This
|
||||
* will return null if positions were not indexed.
|
||||
|
||||
* @param liveDocs unset bits are documents that should not
|
||||
* be returned
|
||||
* @param reuse pass a prior DocsAndPositionsEnum for possible reuse
|
||||
* @param flags specifies which optional per-position values you
|
||||
* require; see {@link DocsAndPositionsEnum#FLAG_OFFSETS} and
|
||||
* {@link DocsAndPositionsEnum#FLAG_PAYLOADS}. */
|
||||
public abstract DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException;
|
||||
* you require; see {@link PostingsEnum#FLAG_FREQS}
|
||||
* @see #postings(Bits, PostingsEnum, int) */
|
||||
public abstract PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException;
|
||||
|
||||
/**
|
||||
* Expert: Returns the TermsEnums internal state to position the TermsEnum
|
||||
|
@ -245,12 +218,7 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) {
|
||||
throw new IllegalStateException("this method should never be called");
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) {
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
|
||||
throw new IllegalStateException("this method should never be called");
|
||||
}
|
||||
|
||||
|
@ -273,5 +241,6 @@ public abstract class TermsEnum implements BytesRefIterator {
|
|||
public void seekExact(BytesRef term, TermState state) {
|
||||
throw new IllegalStateException("this method should never be called");
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
|
|
@ -17,15 +17,16 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
/**
|
||||
* Caches all docs, and optionally also scores, coming from
|
||||
* a search, and is then able to replay them to another
|
||||
|
@ -73,11 +74,32 @@ public abstract class CachingCollector extends FilterCollector {
|
|||
@Override
|
||||
public final int freq() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int nextDoc() { throw new UnsupportedOperationException(); }
|
||||
|
||||
@Override
|
||||
public long cost() { return 1; }
|
||||
|
||||
}
|
||||
|
||||
private static class NoScoreCachingCollector extends CachingCollector {
|
||||
|
|
|
@ -72,7 +72,7 @@ public interface Collector {
|
|||
* next atomic reader context
|
||||
*/
|
||||
LeafCollector getLeafCollector(LeafReaderContext context) throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Indicates if document scores are needed by this collector.
|
||||
*
|
||||
|
|
|
@ -23,9 +23,11 @@ import java.util.Collection;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Scorer for conjunctions, sets of queries, all of which are required. */
|
||||
class ConjunctionScorer extends Scorer {
|
||||
|
||||
protected int lastDoc = -1;
|
||||
protected final DocsAndFreqs[] docsAndFreqs;
|
||||
private final DocsAndFreqs lead;
|
||||
|
@ -34,7 +36,7 @@ class ConjunctionScorer extends Scorer {
|
|||
ConjunctionScorer(Weight weight, Scorer[] scorers) {
|
||||
this(weight, scorers, 1f);
|
||||
}
|
||||
|
||||
|
||||
ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) {
|
||||
super(weight);
|
||||
this.coord = coord;
|
||||
|
@ -109,12 +111,32 @@ class ConjunctionScorer extends Scorer {
|
|||
}
|
||||
return sum * coord;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
return docsAndFreqs.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return lead.scorer.cost();
|
||||
|
@ -133,7 +155,7 @@ class ConjunctionScorer extends Scorer {
|
|||
final long cost;
|
||||
final Scorer scorer;
|
||||
int doc = -1;
|
||||
|
||||
|
||||
DocsAndFreqs(Scorer scorer) {
|
||||
this.scorer = scorer;
|
||||
this.cost = scorer.cost();
|
||||
|
|
|
@ -17,17 +17,18 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* A query that wraps another query or a filter and simply returns a constant score equal to the
|
||||
* query boost for every document that matches the filter or query.
|
||||
|
@ -135,7 +136,6 @@ public class ConstantScoreQuery extends Query {
|
|||
|
||||
@Override
|
||||
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException {
|
||||
final DocIdSetIterator disi;
|
||||
if (filter != null) {
|
||||
assert query == null;
|
||||
return super.bulkScorer(context, acceptDocs, needsScores);
|
||||
|
@ -151,23 +151,26 @@ public class ConstantScoreQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException {
|
||||
final DocIdSetIterator disi;
|
||||
if (filter != null) {
|
||||
assert query == null;
|
||||
final DocIdSet dis = filter.getDocIdSet(context, acceptDocs);
|
||||
if (dis == null) {
|
||||
return null;
|
||||
}
|
||||
disi = dis.iterator();
|
||||
final DocIdSetIterator disi = dis.iterator();
|
||||
if (disi == null)
|
||||
return null;
|
||||
return new ConstantDocIdSetIteratorScorer(disi, this, queryWeight);
|
||||
} else {
|
||||
assert query != null && innerWeight != null;
|
||||
disi = innerWeight.scorer(context, acceptDocs, false);
|
||||
Scorer scorer = innerWeight.scorer(context, acceptDocs, false);
|
||||
if (scorer == null) {
|
||||
return null;
|
||||
}
|
||||
return new ConstantScoreScorer(scorer, queryWeight);
|
||||
}
|
||||
|
||||
if (disi == null) {
|
||||
return null;
|
||||
}
|
||||
return new ConstantScorer(disi, this, queryWeight);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -216,7 +219,7 @@ public class ConstantScoreQuery extends Query {
|
|||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
// we must wrap again here, but using the scorer passed in as parameter:
|
||||
in.setScorer(new ConstantScorer(scorer, weight, theScore));
|
||||
in.setScorer(new ConstantScoreScorer(scorer, theScore));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -227,11 +230,40 @@ public class ConstantScoreQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
protected class ConstantScorer extends Scorer {
|
||||
protected class ConstantScoreScorer extends FilterScorer {
|
||||
|
||||
private final float score;
|
||||
|
||||
public ConstantScoreScorer(Scorer wrapped, float score) {
|
||||
super(wrapped);
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ChildScorer> getChildren() {
|
||||
if (query != null) {
|
||||
return Collections.singletonList(new ChildScorer(in, "constant"));
|
||||
} else {
|
||||
return Collections.emptyList();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected class ConstantDocIdSetIteratorScorer extends Scorer {
|
||||
final DocIdSetIterator docIdSetIterator;
|
||||
final float theScore;
|
||||
|
||||
public ConstantScorer(DocIdSetIterator docIdSetIterator, Weight w, float theScore) {
|
||||
public ConstantDocIdSetIteratorScorer(DocIdSetIterator docIdSetIterator, Weight w, float theScore) {
|
||||
super(w);
|
||||
this.theScore = theScore;
|
||||
this.docIdSetIterator = docIdSetIterator;
|
||||
|
@ -258,11 +290,31 @@ public class ConstantScoreQuery extends Query {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return docIdSetIterator.advance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docIdSetIterator.cost();
|
||||
|
|
|
@ -22,15 +22,18 @@ import java.util.ArrayList;
|
|||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Base class for Scorers that score disjunctions.
|
||||
*/
|
||||
abstract class DisjunctionScorer extends Scorer {
|
||||
|
||||
private final ScorerPriorityQueue subScorers;
|
||||
|
||||
/** The document number of the current match. */
|
||||
protected int doc = -1;
|
||||
protected int numScorers;
|
||||
/** Number of matching scorers for the current match. */
|
||||
private int freq = -1;
|
||||
/** Linked list of scorers which are on the current doc */
|
||||
|
@ -56,6 +59,26 @@ abstract class DisjunctionScorer extends Scorer {
|
|||
return children;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final long cost() {
|
||||
long sum = 0;
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocsEnum; // javadoc @link
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -63,7 +62,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
*
|
||||
* In contrast, TermsFilter builds up an {@link FixedBitSet},
|
||||
* keyed by docID, every time it's created, by enumerating
|
||||
* through all matching docs using {@link DocsEnum} to seek
|
||||
* through all matching docs using {@link org.apache.lucene.index.PostingsEnum} to seek
|
||||
* and scan through each term's docID list. While there is
|
||||
* no linear scan of all docIDs, besides the allocation of
|
||||
* the underlying array in the {@link FixedBitSet}, this
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
final class ExactPhraseScorer extends Scorer {
|
||||
private final int endMinus1;
|
||||
|
@ -35,21 +36,21 @@ final class ExactPhraseScorer extends Scorer {
|
|||
private final long cost;
|
||||
|
||||
private final static class ChunkState {
|
||||
final DocsAndPositionsEnum posEnum;
|
||||
final PostingsEnum posEnum;
|
||||
final int offset;
|
||||
int posUpto;
|
||||
int posLimit;
|
||||
int pos;
|
||||
int lastPos;
|
||||
|
||||
public ChunkState(DocsAndPositionsEnum posEnum, int offset) {
|
||||
public ChunkState(PostingsEnum posEnum, int offset) {
|
||||
this.posEnum = posEnum;
|
||||
this.offset = offset;
|
||||
}
|
||||
}
|
||||
|
||||
private final ChunkState[] chunkStates;
|
||||
private final DocsAndPositionsEnum lead;
|
||||
private final PostingsEnum lead;
|
||||
|
||||
private int docID = -1;
|
||||
private int freq;
|
||||
|
@ -81,7 +82,7 @@ final class ExactPhraseScorer extends Scorer {
|
|||
// TODO: don't dup this logic from conjunctionscorer :)
|
||||
advanceHead: for(;;) {
|
||||
for (int i = 1; i < chunkStates.length; i++) {
|
||||
final DocsAndPositionsEnum de = chunkStates[i].posEnum;
|
||||
final PostingsEnum de = chunkStates[i].posEnum;
|
||||
if (de.docID() < doc) {
|
||||
int d = de.advance(doc);
|
||||
|
||||
|
@ -126,6 +127,26 @@ final class ExactPhraseScorer extends Scorer {
|
|||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
|
|
|
@ -17,8 +17,11 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Used by {@link BulkScorer}s that need to pass a {@link
|
||||
* Scorer} to {@link LeafCollector#setScorer}. */
|
||||
final class FakeScorer extends Scorer {
|
||||
|
@ -45,6 +48,26 @@ final class FakeScorer extends Scorer {
|
|||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
throw new UnsupportedOperationException("FakeScorer doesn't support nextPosition()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
throw new UnsupportedOperationException("FakeScorer doesn't support startOffset()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
throw new UnsupportedOperationException("FakeScorer doesn't support endOffset()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException("FakeScorer doesn't support getPayload()");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
throw new UnsupportedOperationException("FakeScorer doesn't support nextDoc()");
|
||||
|
|
|
@ -18,9 +18,9 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* A {@code FilterScorer} contains another {@code Scorer}, which it
|
||||
|
@ -32,13 +32,27 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* further override some of these methods and may also provide additional
|
||||
* methods and fields.
|
||||
*/
|
||||
abstract class FilterScorer extends Scorer {
|
||||
public abstract class FilterScorer extends Scorer {
|
||||
protected final Scorer in;
|
||||
|
||||
|
||||
/**
|
||||
* Create a new FilterScorer
|
||||
* @param in the {@link Scorer} to wrap
|
||||
*/
|
||||
public FilterScorer(Scorer in) {
|
||||
super(in.weight);
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new FilterScorer with a specific weight
|
||||
* @param in the {@link Scorer} to wrap
|
||||
* @param weight a {@link Weight}
|
||||
*/
|
||||
public FilterScorer(Scorer in, Weight weight) {
|
||||
super(weight);
|
||||
this.in = in;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
|
@ -60,6 +74,11 @@ abstract class FilterScorer extends Scorer {
|
|||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return in.nextPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
|
@ -70,6 +89,21 @@ abstract class FilterScorer extends Scorer {
|
|||
return in.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return in.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return in.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return in.getPayload();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttributeSource attributes() {
|
||||
return in.attributes();
|
||||
|
|
|
@ -17,17 +17,17 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
|
||||
/**
|
||||
* A query that applies a filter to the results of another query.
|
||||
|
@ -143,6 +143,7 @@ public class FilteredQuery extends Query {
|
|||
}
|
||||
|
||||
return strategy.filteredBulkScorer(context, weight, filterDocIdSet, needsScores);
|
||||
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -153,13 +154,13 @@ public class FilteredQuery extends Query {
|
|||
* than document scoring or if the filter has a linear running time to compute
|
||||
* the next matching doc like exact geo distances.
|
||||
*/
|
||||
private static final class QueryFirstScorer extends Scorer {
|
||||
private static final class QueryFirstScorer extends FilterScorer {
|
||||
private final Scorer scorer;
|
||||
private int scorerDoc = -1;
|
||||
private final Bits filterBits;
|
||||
|
||||
protected QueryFirstScorer(Weight weight, Bits filterBits, Scorer other) {
|
||||
super(weight);
|
||||
super(other, weight);
|
||||
this.scorer = other;
|
||||
this.filterBits = filterBits;
|
||||
}
|
||||
|
@ -184,29 +185,16 @@ public class FilteredQuery extends Query {
|
|||
return scorerDoc = doc;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return scorerDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return scorer.score();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException { return scorer.freq(); }
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<ChildScorer> getChildren() {
|
||||
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return scorer.cost();
|
||||
}
|
||||
}
|
||||
|
||||
private static class QueryFirstBulkScorer extends BulkScorer {
|
||||
|
@ -254,7 +242,7 @@ public class FilteredQuery extends Query {
|
|||
* jumping past the target document. When both land on the same document, it's
|
||||
* collected.
|
||||
*/
|
||||
private static final class LeapFrogScorer extends Scorer {
|
||||
private static final class LeapFrogScorer extends FilterScorer {
|
||||
private final DocIdSetIterator secondary;
|
||||
private final DocIdSetIterator primary;
|
||||
private final Scorer scorer;
|
||||
|
@ -262,7 +250,7 @@ public class FilteredQuery extends Query {
|
|||
private int secondaryDoc = -1;
|
||||
|
||||
protected LeapFrogScorer(Weight weight, DocIdSetIterator primary, DocIdSetIterator secondary, Scorer scorer) {
|
||||
super(weight);
|
||||
super(scorer, weight);
|
||||
this.primary = primary;
|
||||
this.secondary = secondary;
|
||||
this.scorer = scorer;
|
||||
|
@ -302,17 +290,7 @@ public class FilteredQuery extends Query {
|
|||
public final int docID() {
|
||||
return secondaryDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float score() throws IOException {
|
||||
return scorer.score();
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int freq() throws IOException {
|
||||
return scorer.freq();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public final Collection<ChildScorer> getChildren() {
|
||||
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
|
||||
|
@ -489,6 +467,7 @@ public class FilteredQuery extends Query {
|
|||
// ignore scoreDocsInOrder:
|
||||
return new Weight.DefaultBulkScorer(scorer);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -595,8 +574,7 @@ public class FilteredQuery extends Query {
|
|||
return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet, needsScores);
|
||||
}
|
||||
final Scorer scorer = weight.scorer(context, null, needsScores);
|
||||
return scorer == null ? null : new QueryFirstScorer(weight,
|
||||
filterAcceptDocs, scorer);
|
||||
return scorer == null ? null : new QueryFirstScorer(weight, filterAcceptDocs, scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,8 +22,7 @@ import java.util.ArrayList;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
@ -266,14 +265,8 @@ public class FuzzyTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) throws IOException {
|
||||
return actualEnum.docs(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
|
||||
DocsAndPositionsEnum reuse, int flags) throws IOException {
|
||||
return actualEnum.docsAndPositions(liveDocs, reuse, flags);
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) throws IOException {
|
||||
return actualEnum.postings(liveDocs, reuse, flags);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.util.Set;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* A query that matches all documents.
|
||||
|
@ -72,6 +73,26 @@ public class MatchAllDocsQuery extends Query {
|
|||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
doc = target-1;
|
||||
|
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
@ -28,8 +24,13 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.search.ScorerPriorityQueue.ScorerWrapper;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.leftNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.parentNode;
|
||||
import static org.apache.lucene.search.ScorerPriorityQueue.rightNode;
|
||||
|
||||
/**
|
||||
* A {@link Scorer} for {@link BooleanQuery} when
|
||||
* {@link BooleanQuery#setMinimumNumberShouldMatch(int) minShouldMatch} is
|
||||
|
@ -229,6 +230,26 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Advance tail to the lead until there is a match. */
|
||||
private int doNext() throws IOException {
|
||||
while (freq < minShouldMatch) {
|
||||
|
|
|
@ -111,7 +111,6 @@ public class MultiCollector implements Collector {
|
|||
return new MultiLeafCollector(leafCollectors);
|
||||
}
|
||||
|
||||
|
||||
private static class MultiLeafCollector implements LeafCollector {
|
||||
|
||||
private final LeafCollector[] collectors;
|
||||
|
|
|
@ -21,9 +21,8 @@ import java.io.IOException;
|
|||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -197,11 +196,11 @@ public class MultiPhraseQuery extends Query {
|
|||
for (int pos=0; pos<postingsFreqs.length; pos++) {
|
||||
Term[] terms = termArrays.get(pos);
|
||||
|
||||
final DocsAndPositionsEnum postingsEnum;
|
||||
final PostingsEnum postingsEnum;
|
||||
int docFreq;
|
||||
|
||||
if (terms.length > 1) {
|
||||
postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum);
|
||||
postingsEnum = new UnionPostingsEnum(liveDocs, context, terms, termContexts, termsEnum);
|
||||
|
||||
// coarse -- this overcounts since a given doc can
|
||||
// have more than one term:
|
||||
|
@ -229,11 +228,11 @@ public class MultiPhraseQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
postingsEnum = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
|
||||
postingsEnum = termsEnum.postings(liveDocs, null, PostingsEnum.FLAG_POSITIONS);
|
||||
|
||||
if (postingsEnum == null) {
|
||||
// term does exist, but has no positions
|
||||
assert termsEnum.docs(liveDocs, null, DocsEnum.FLAG_NONE) != null: "termstate found but no term exists in reader";
|
||||
assert termsEnum.postings(liveDocs, null, PostingsEnum.FLAG_NONE) != null: "termstate found but no term exists in reader";
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
}
|
||||
|
||||
|
@ -408,15 +407,15 @@ public class MultiPhraseQuery extends Query {
|
|||
*/
|
||||
|
||||
// TODO: if ever we allow subclassing of the *PhraseScorer
|
||||
class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
||||
class UnionPostingsEnum extends PostingsEnum {
|
||||
|
||||
private static final class DocsQueue extends PriorityQueue<DocsAndPositionsEnum> {
|
||||
DocsQueue(List<DocsAndPositionsEnum> docsEnums) throws IOException {
|
||||
super(docsEnums.size());
|
||||
private static final class DocsQueue extends PriorityQueue<PostingsEnum> {
|
||||
DocsQueue(List<PostingsEnum> postingsEnums) throws IOException {
|
||||
super(postingsEnums.size());
|
||||
|
||||
Iterator<DocsAndPositionsEnum> i = docsEnums.iterator();
|
||||
Iterator<PostingsEnum> i = postingsEnums.iterator();
|
||||
while (i.hasNext()) {
|
||||
DocsAndPositionsEnum postings = i.next();
|
||||
PostingsEnum postings = i.next();
|
||||
if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
add(postings);
|
||||
}
|
||||
|
@ -424,7 +423,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) {
|
||||
public final boolean lessThan(PostingsEnum a, PostingsEnum b) {
|
||||
return a.docID() < b.docID();
|
||||
}
|
||||
}
|
||||
|
@ -473,8 +472,8 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
private IntQueue _posList;
|
||||
private long cost;
|
||||
|
||||
public UnionDocsAndPositionsEnum(Bits liveDocs, LeafReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
|
||||
List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
|
||||
public UnionPostingsEnum(Bits liveDocs, LeafReaderContext context, Term[] terms, Map<Term, TermContext> termContexts, TermsEnum termsEnum) throws IOException {
|
||||
List<PostingsEnum> postingsEnums = new LinkedList<>();
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
final Term term = terms[i];
|
||||
TermState termState = termContexts.get(term).get(context.ord);
|
||||
|
@ -483,16 +482,16 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
continue;
|
||||
}
|
||||
termsEnum.seekExact(term.bytes(), termState);
|
||||
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
|
||||
PostingsEnum postings = termsEnum.postings(liveDocs, null, PostingsEnum.FLAG_POSITIONS);
|
||||
if (postings == null) {
|
||||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
|
||||
}
|
||||
cost += postings.cost();
|
||||
docsEnums.add(postings);
|
||||
postingsEnums.add(postings);
|
||||
}
|
||||
|
||||
_queue = new DocsQueue(docsEnums);
|
||||
_queue = new DocsQueue(postingsEnums);
|
||||
_posList = new IntQueue();
|
||||
}
|
||||
|
||||
|
@ -509,7 +508,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
_doc = _queue.top().docID();
|
||||
|
||||
// merge sort all positions together
|
||||
DocsAndPositionsEnum postings;
|
||||
PostingsEnum postings;
|
||||
do {
|
||||
postings = _queue.top();
|
||||
|
||||
|
@ -554,7 +553,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
@Override
|
||||
public final int advance(int target) throws IOException {
|
||||
while (_queue.top() != null && target > _queue.top().docID()) {
|
||||
DocsAndPositionsEnum postings = _queue.pop();
|
||||
PostingsEnum postings = _queue.pop();
|
||||
if (postings.advance(target) != NO_MORE_DOCS) {
|
||||
_queue.add(postings);
|
||||
}
|
||||
|
|
|
@ -19,9 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -94,9 +92,9 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
assert termsEnum != null;
|
||||
|
||||
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
|
||||
DocsEnum docs = null;
|
||||
PostingsEnum docs = null;
|
||||
while (termsEnum.next() != null) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE);
|
||||
docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.FLAG_NONE);
|
||||
builder.or(docs);
|
||||
}
|
||||
return builder.build();
|
||||
|
|
|
@ -29,13 +29,13 @@ final class PhrasePositions {
|
|||
int count; // remaining pos in this doc
|
||||
int offset; // position in phrase
|
||||
final int ord; // unique across all PhrasePositions instances
|
||||
final DocsAndPositionsEnum postings; // stream of docs & positions
|
||||
final PostingsEnum postings; // stream of docs & positions
|
||||
PhrasePositions next; // used to make lists
|
||||
int rptGroup = -1; // >=0 indicates that this is a repeating PP
|
||||
int rptInd; // index in the rptGroup
|
||||
final Term[] terms; // for repetitions initialization
|
||||
|
||||
PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) {
|
||||
PhrasePositions(PostingsEnum postings, int o, int ord, Term[] terms) {
|
||||
this.postings = postings;
|
||||
offset = o;
|
||||
this.ord = ord;
|
||||
|
|
|
@ -22,19 +22,18 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
@ -138,13 +137,13 @@ public class PhraseQuery extends Query {
|
|||
}
|
||||
|
||||
static class PostingsAndFreq implements Comparable<PostingsAndFreq> {
|
||||
final DocsAndPositionsEnum postings;
|
||||
final PostingsEnum postings;
|
||||
final int docFreq;
|
||||
final int position;
|
||||
final Term[] terms;
|
||||
final int nTerms; // for faster comparisons
|
||||
|
||||
public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) {
|
||||
public PostingsAndFreq(PostingsEnum postings, int docFreq, int position, Term... terms) {
|
||||
this.postings = postings;
|
||||
this.docFreq = docFreq;
|
||||
this.position = position;
|
||||
|
@ -267,7 +266,7 @@ public class PhraseQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
te.seekExact(t.bytes(), state);
|
||||
DocsAndPositionsEnum postingsEnum = te.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
|
||||
PostingsEnum postingsEnum = te.postings(liveDocs, null, PostingsEnum.FLAG_POSITIONS);
|
||||
|
||||
// PhraseQuery on a field that did not index
|
||||
// positions.
|
||||
|
@ -276,7 +275,7 @@ public class PhraseQuery extends Query {
|
|||
// term does exist, but has no positions
|
||||
throw new IllegalStateException("field \"" + t.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.text() + ")");
|
||||
}
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i).intValue(), t);
|
||||
postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.docFreq(), positions.get(i), t);
|
||||
}
|
||||
|
||||
// sort by increasing docFreq order
|
||||
|
|
|
@ -17,13 +17,14 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
|
||||
/** A {@link Rescorer} that uses a provided Query to assign
|
||||
* scores to the first-pass hits.
|
||||
*
|
||||
|
|
|
@ -17,11 +17,12 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Constrains search results to only match those which also match a provided
|
||||
* query.
|
||||
|
|
|
@ -27,7 +27,7 @@ import java.util.Collections;
|
|||
* This <code>Scorer</code> implements {@link Scorer#advance(int)},
|
||||
* and it uses the skipTo() on the given scorers.
|
||||
*/
|
||||
class ReqExclScorer extends Scorer {
|
||||
class ReqExclScorer extends FilterScorer {
|
||||
private Scorer reqScorer;
|
||||
private DocIdSetIterator exclDisi;
|
||||
private int doc = -1;
|
||||
|
@ -37,7 +37,7 @@ class ReqExclScorer extends Scorer {
|
|||
* @param exclDisi indicates exclusion.
|
||||
*/
|
||||
public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) {
|
||||
super(reqScorer.weight);
|
||||
super(reqScorer);
|
||||
this.reqScorer = reqScorer;
|
||||
this.exclDisi = exclDisi;
|
||||
}
|
||||
|
@ -103,11 +103,6 @@ class ReqExclScorer extends Scorer {
|
|||
public float score() throws IOException {
|
||||
return reqScorer.score(); // reqScorer may be null when next() or skipTo() already return false
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return reqScorer.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ChildScorer> getChildren() {
|
||||
|
@ -129,8 +124,4 @@ class ReqExclScorer extends Scorer {
|
|||
return doc = toNonExcluded();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return reqScorer.cost();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,8 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** A Scorer for queries with a required part and an optional part.
|
||||
* Delays skipTo() on the optional part until a score() is needed.
|
||||
* <br>
|
||||
|
@ -92,6 +94,26 @@ class ReqOptSumScorer extends Scorer {
|
|||
return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<ChildScorer> getChildren() {
|
||||
ArrayList<ChildScorer> children = new ArrayList<>(2);
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
|
||||
/**
|
||||
* Expert: Common scoring functionality for different types of queries.
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.index.DocsEnum;
|
|||
* TopScoreDocCollector}) will not properly collect hits
|
||||
* with these scores.
|
||||
*/
|
||||
public abstract class Scorer extends DocsEnum {
|
||||
public abstract class Scorer extends PostingsEnum {
|
||||
/** the Scorer's parent Weight. in some cases this may be null */
|
||||
// TODO can we clean this up?
|
||||
protected final Weight weight;
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.LinkedHashMap;
|
|||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
final class SloppyPhraseScorer extends Scorer {
|
||||
|
@ -527,7 +528,27 @@ final class SloppyPhraseScorer extends Scorer {
|
|||
public int freq() {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
float sloppyFreq() {
|
||||
return sloppyFreq;
|
||||
}
|
||||
|
|
|
@ -20,60 +20,64 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.ReaderUtil;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/** A Query that matches documents containing a term.
|
||||
This may be combined with other terms with a {@link BooleanQuery}.
|
||||
*/
|
||||
/**
|
||||
* A Query that matches documents containing a term. This may be combined with
|
||||
* other terms with a {@link BooleanQuery}.
|
||||
*/
|
||||
public class TermQuery extends Query {
|
||||
private final Term term;
|
||||
private final int docFreq;
|
||||
private final TermContext perReaderTermState;
|
||||
|
||||
|
||||
final class TermWeight extends Weight {
|
||||
private final Similarity similarity;
|
||||
private final Similarity.SimWeight stats;
|
||||
private final TermContext termStates;
|
||||
|
||||
|
||||
public TermWeight(IndexSearcher searcher, TermContext termStates)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
assert termStates != null : "TermContext must not be null";
|
||||
this.termStates = termStates;
|
||||
this.similarity = searcher.getSimilarity();
|
||||
this.stats = similarity.computeWeight(
|
||||
getBoost(),
|
||||
searcher.collectionStatistics(term.field()),
|
||||
this.stats = similarity.computeWeight(getBoost(),
|
||||
searcher.collectionStatistics(term.field()),
|
||||
searcher.termStatistics(term, termStates));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() { return "weight(" + TermQuery.this + ")"; }
|
||||
|
||||
public String toString() {
|
||||
return "weight(" + TermQuery.this + ")";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() { return TermQuery.this; }
|
||||
|
||||
public Query getQuery() {
|
||||
return TermQuery.this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() {
|
||||
return stats.getValueForNormalization();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void normalize(float queryNorm, float topLevelBoost) {
|
||||
stats.normalize(queryNorm, topLevelBoost);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, boolean needsScores) throws IOException {
|
||||
assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context);
|
||||
|
@ -81,7 +85,7 @@ public class TermQuery extends Query {
|
|||
if (termsEnum == null) {
|
||||
return null;
|
||||
}
|
||||
DocsEnum docs = termsEnum.docs(acceptDocs, null, needsScores ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
|
||||
PostingsEnum docs = termsEnum.postings(acceptDocs, null, needsScores ? PostingsEnum.FLAG_FREQS : PostingsEnum.FLAG_NONE);
|
||||
assert docs != null;
|
||||
return new TermScorer(this, docs, similarity.simScorer(stats, context));
|
||||
}
|
||||
|
@ -96,15 +100,18 @@ public class TermQuery extends Query {
|
|||
assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term;
|
||||
return null;
|
||||
}
|
||||
//System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
|
||||
// System.out.println("LD=" + reader.getLiveDocs() + " set?=" +
|
||||
// (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null"));
|
||||
final TermsEnum termsEnum = context.reader().terms(term.field())
|
||||
.iterator(null);
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
return termsEnum;
|
||||
}
|
||||
|
||||
private boolean termNotInReader(LeafReader reader, Term term) throws IOException {
|
||||
// only called from assert
|
||||
//System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString());
|
||||
// System.out.println("TQ.termNotInReader reader=" + reader + " term=" +
|
||||
// field + ":" + bytes.utf8ToString());
|
||||
return reader.docFreq(term) == 0;
|
||||
}
|
||||
|
||||
|
@ -117,69 +124,76 @@ public class TermQuery extends Query {
|
|||
float freq = scorer.freq();
|
||||
SimScorer docScorer = similarity.simScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq));
|
||||
result.setDescription("weight(" + getQuery() + " in " + doc + ") ["
|
||||
+ similarity.getClass().getSimpleName() + "], result of:");
|
||||
Explanation scoreExplanation = docScorer.explain(doc,
|
||||
new Explanation(freq, "termFreq=" + freq));
|
||||
result.addDetail(scoreExplanation);
|
||||
result.setValue(scoreExplanation.getValue());
|
||||
result.setMatch(true);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
return new ComplexExplanation(false, 0.0f, "no matching term");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Constructs a query for the term <code>t</code>. */
|
||||
public TermQuery(Term t) {
|
||||
this(t, -1);
|
||||
}
|
||||
|
||||
/** Expert: constructs a TermQuery that will use the
|
||||
* provided docFreq instead of looking up the docFreq
|
||||
* against the searcher. */
|
||||
|
||||
/**
|
||||
* Expert: constructs a TermQuery that will use the provided docFreq instead
|
||||
* of looking up the docFreq against the searcher.
|
||||
*/
|
||||
public TermQuery(Term t, int docFreq) {
|
||||
term = t;
|
||||
this.docFreq = docFreq;
|
||||
perReaderTermState = null;
|
||||
}
|
||||
|
||||
/** Expert: constructs a TermQuery that will use the
|
||||
* provided docFreq instead of looking up the docFreq
|
||||
* against the searcher. */
|
||||
/**
|
||||
* Expert: constructs a TermQuery that will use the provided docFreq instead
|
||||
* of looking up the docFreq against the searcher.
|
||||
*/
|
||||
public TermQuery(Term t, TermContext states) {
|
||||
assert states != null;
|
||||
term = t;
|
||||
docFreq = states.docFreq();
|
||||
perReaderTermState = states;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the term of this query. */
|
||||
public Term getTerm() { return term; }
|
||||
|
||||
public Term getTerm() {
|
||||
return term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher) throws IOException {
|
||||
final IndexReaderContext context = searcher.getTopReaderContext();
|
||||
final TermContext termState;
|
||||
if (perReaderTermState == null || perReaderTermState.topReaderContext != context) {
|
||||
// make TermQuery single-pass if we don't have a PRTS or if the context differs!
|
||||
if (perReaderTermState == null
|
||||
|| perReaderTermState.topReaderContext != context) {
|
||||
// make TermQuery single-pass if we don't have a PRTS or if the context
|
||||
// differs!
|
||||
termState = TermContext.build(context, term);
|
||||
} else {
|
||||
// PRTS was pre-build for this IS
|
||||
termState = this.perReaderTermState;
|
||||
// PRTS was pre-build for this IS
|
||||
termState = this.perReaderTermState;
|
||||
}
|
||||
|
||||
|
||||
// we must not ignore the given docFreq - if set use the given value (lie)
|
||||
if (docFreq != -1)
|
||||
termState.setDocFreq(docFreq);
|
||||
if (docFreq != -1) termState.setDocFreq(docFreq);
|
||||
|
||||
return new TermWeight(searcher, termState);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void extractTerms(Set<Term> terms) {
|
||||
terms.add(getTerm());
|
||||
}
|
||||
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
|
@ -192,21 +206,20 @@ public class TermQuery extends Query {
|
|||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
|
||||
/** Returns true iff <code>o</code> is equal to this. */
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (!(o instanceof TermQuery))
|
||||
return false;
|
||||
TermQuery other = (TermQuery)o;
|
||||
if (!(o instanceof TermQuery)) return false;
|
||||
TermQuery other = (TermQuery) o;
|
||||
return (this.getBoost() == other.getBoost())
|
||||
&& this.term.equals(other.term);
|
||||
&& this.term.equals(other.term);
|
||||
}
|
||||
|
||||
/** Returns a hash code value for this object.*/
|
||||
|
||||
/** Returns a hash code value for this object. */
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Float.floatToIntBits(getBoost()) ^ term.hashCode();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -19,78 +19,99 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Expert: A <code>Scorer</code> for documents matching a <code>Term</code>.
|
||||
*/
|
||||
final class TermScorer extends Scorer {
|
||||
private final DocsEnum docsEnum;
|
||||
private final PostingsEnum postingsEnum;
|
||||
private final Similarity.SimScorer docScorer;
|
||||
|
||||
|
||||
/**
|
||||
* Construct a <code>TermScorer</code>.
|
||||
*
|
||||
*
|
||||
* @param weight
|
||||
* The weight of the <code>Term</code> in the query.
|
||||
* @param td
|
||||
* An iterator over the documents matching the <code>Term</code>.
|
||||
* @param docScorer
|
||||
* The </code>Similarity.SimScorer</code> implementation
|
||||
* The </code>Similarity.SimScorer</code> implementation
|
||||
* to be used for score computations.
|
||||
*/
|
||||
TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) {
|
||||
TermScorer(Weight weight, PostingsEnum td, Similarity.SimScorer docScorer) {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.docsEnum = td;
|
||||
this.postingsEnum = td;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docsEnum.docID();
|
||||
return postingsEnum.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return docsEnum.freq();
|
||||
return postingsEnum.freq();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
return postingsEnum.nextPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return postingsEnum.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return postingsEnum.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
return postingsEnum.getPayload();
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances to the next document matching the query. <br>
|
||||
*
|
||||
*
|
||||
* @return the document matching the query or NO_MORE_DOCS if there are no more documents.
|
||||
*/
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return docsEnum.nextDoc();
|
||||
return postingsEnum.nextDoc();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
assert docID() != NO_MORE_DOCS;
|
||||
return docScorer.score(docsEnum.docID(), docsEnum.freq());
|
||||
return docScorer.score(postingsEnum.docID(), postingsEnum.freq());
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances to the first match beyond the current whose document number is
|
||||
* greater than or equal to a given target. <br>
|
||||
* The implementation uses {@link DocsEnum#advance(int)}.
|
||||
*
|
||||
* The implementation uses {@link org.apache.lucene.index.PostingsEnum#advance(int)}.
|
||||
*
|
||||
* @param target
|
||||
* The target document number.
|
||||
* @return the matching document or NO_MORE_DOCS if none exist.
|
||||
*/
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return docsEnum.advance(target);
|
||||
return postingsEnum.advance(target);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return docsEnum.cost();
|
||||
return postingsEnum.cost();
|
||||
}
|
||||
|
||||
/** Returns a string representation of this <code>TermScorer</code>. */
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
public String toString() { return "scorer(" + weight + ")[" + super.toString() + "]"; }
|
||||
}
|
||||
|
|
|
@ -17,12 +17,12 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* The {@link TimeLimitingCollector} is used to timeout search requests that
|
||||
* take longer than the maximum allowed search time limit. After this time is
|
||||
|
@ -156,7 +156,7 @@ public class TimeLimitingCollector implements Collector {
|
|||
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return collector.needsScores();
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Just counts the total number of hits.
|
||||
*/
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReaderContext; // javadocs
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
|
@ -187,4 +187,5 @@ public abstract class Weight {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,11 +17,15 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
@ -37,10 +41,6 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* This class is very similar to
|
||||
* {@link org.apache.lucene.search.spans.SpanNearQuery} except that it factors
|
||||
|
|
|
@ -17,27 +17,27 @@ package org.apache.lucene.search.payloads;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.SpanScorer;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* This class is very similar to
|
||||
* {@link org.apache.lucene.search.spans.SpanTermQuery} except that it factors
|
||||
|
@ -120,7 +120,7 @@ public class PayloadTermQuery extends SpanTermQuery {
|
|||
|
||||
protected void processPayload(Similarity similarity) throws IOException {
|
||||
if (termSpans.isPayloadAvailable()) {
|
||||
final DocsAndPositionsEnum postings = termSpans.getPostings();
|
||||
final PostingsEnum postings = termSpans.getPostings();
|
||||
payload = postings.getPayload();
|
||||
if (payload != null) {
|
||||
payloadScore = function.currentScore(doc, term.field(),
|
||||
|
|
|
@ -17,10 +17,8 @@ package org.apache.lucene.search.similarities;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
@ -28,9 +26,11 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.spans.SpanQuery; // javadoc
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.SmallFloat; // javadoc
|
||||
import org.apache.lucene.util.SmallFloat;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Similarity defines the components of Lucene scoring.
|
||||
|
|
|
@ -19,9 +19,10 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Public for extension only.
|
||||
|
@ -96,16 +97,37 @@ public class SpanScorer extends Scorer {
|
|||
public int freq() throws IOException {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int nextPosition() throws IOException {
|
||||
throw new UnsupportedOperationException("SpanQueries do not support nextPosition() iteration");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/** Returns the intermediate "sloppy freq" adjusted for edit distance
|
||||
* @lucene.internal */
|
||||
// only public so .payloads can see it.
|
||||
public float sloppyFreq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return spans.cost();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,10 +17,13 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
|
@ -28,10 +31,6 @@ import org.apache.lucene.index.TermsEnum;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/** Matches spans containing a term. */
|
||||
public class SpanTermQuery extends SpanQuery {
|
||||
protected Term term;
|
||||
|
@ -115,7 +114,7 @@ public class SpanTermQuery extends SpanQuery {
|
|||
final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null);
|
||||
termsEnum.seekExact(term.bytes(), state);
|
||||
|
||||
final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
final PostingsEnum postings = termsEnum.postings(acceptDocs, null, PostingsEnum.FLAG_PAYLOADS);
|
||||
|
||||
if (postings != null) {
|
||||
return new TermSpans(postings, term);
|
||||
|
|
|
@ -17,20 +17,26 @@ package org.apache.lucene.search.spans;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.Similarity.SimScorer;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* Expert-only. Public for use by other weight implementations
|
||||
*/
|
||||
|
|
|
@ -17,7 +17,7 @@ package org.apache.lucene.search.spans;
|
|||
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
|
@ -30,7 +30,7 @@ import java.util.Collection;
|
|||
* Public for extension only
|
||||
*/
|
||||
public class TermSpans extends Spans {
|
||||
protected final DocsAndPositionsEnum postings;
|
||||
protected final PostingsEnum postings;
|
||||
protected final Term term;
|
||||
protected int doc;
|
||||
protected int freq;
|
||||
|
@ -38,7 +38,7 @@ public class TermSpans extends Spans {
|
|||
protected int position;
|
||||
protected boolean readPayload;
|
||||
|
||||
public TermSpans(DocsAndPositionsEnum postings, Term term) {
|
||||
public TermSpans(PostingsEnum postings, Term term) {
|
||||
this.postings = postings;
|
||||
this.term = term;
|
||||
doc = -1;
|
||||
|
@ -132,7 +132,7 @@ public class TermSpans extends Spans {
|
|||
(doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position);
|
||||
}
|
||||
|
||||
public DocsAndPositionsEnum getPostings() {
|
||||
public PostingsEnum getPostings() {
|
||||
return postings;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,11 +22,28 @@ import java.io.PrintWriter;
|
|||
import java.io.StringWriter;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.StoredDocument;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestSearchForDuplicates extends LuceneTestCase {
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -84,7 +84,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase {
|
|||
writer.addDocument(doc);
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
DocsAndPositionsEnum termPositions = MultiFields.getTermPositionsEnum(reader,
|
||||
PostingsEnum termPositions = MultiFields.getTermPositionsEnum(reader,
|
||||
MultiFields.getLiveDocs(reader),
|
||||
"preanalyzed",
|
||||
new BytesRef("term1"));
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.util.Random;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
|
@ -321,7 +321,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
final Terms terms = fields.terms("f");
|
||||
final TermsEnum te = terms.iterator(null);
|
||||
assertEquals(new BytesRef("a"), te.next());
|
||||
final DocsAndPositionsEnum dpe = te.docsAndPositions(null, null);
|
||||
final PostingsEnum dpe = te.postings(null, null, PostingsEnum.FLAG_ALL);
|
||||
assertEquals(0, dpe.nextDoc());
|
||||
assertEquals(2, dpe.freq());
|
||||
assertEquals(0, dpe.nextPosition());
|
||||
|
|
|
@ -27,5 +27,10 @@ public class TestAssertingDocValuesFormat extends BasePostingsFormatTestCase {
|
|||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isPostingsEnumReuseImplemented() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,5 +27,10 @@ public class TestAssertingPostingsFormat extends BasePostingsFormatTestCase {
|
|||
@Override
|
||||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean isPostingsEnumReuseImplemented() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,8 +33,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -284,93 +283,93 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
public void assertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, boolean deep) throws Exception {
|
||||
BytesRef term;
|
||||
Bits randomBits = new RandomBits(MAXDOC, random().nextDouble(), random());
|
||||
DocsAndPositionsEnum leftPositions = null;
|
||||
DocsAndPositionsEnum rightPositions = null;
|
||||
DocsEnum leftDocs = null;
|
||||
DocsEnum rightDocs = null;
|
||||
PostingsEnum leftPositions = null;
|
||||
PostingsEnum rightPositions = null;
|
||||
PostingsEnum leftDocs = null;
|
||||
PostingsEnum rightDocs = null;
|
||||
|
||||
while ((term = leftTermsEnum.next()) != null) {
|
||||
assertEquals(term, rightTermsEnum.next());
|
||||
assertTermStats(leftTermsEnum, rightTermsEnum);
|
||||
if (deep) {
|
||||
// with payloads + off
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_ALL),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_ALL));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_ALL),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_ALL));
|
||||
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions));
|
||||
leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_ALL),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_ALL));
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions));
|
||||
leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_ALL),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_ALL));
|
||||
// with payloads only
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_PAYLOADS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_PAYLOADS));
|
||||
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
|
||||
leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_PAYLOADS));
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_PAYLOADS));
|
||||
leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_PAYLOADS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_PAYLOADS));
|
||||
|
||||
// with offsets only
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_OFFSETS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_OFFSETS));
|
||||
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
|
||||
leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_OFFSETS));
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsAndPositionsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsAndPositionsEnum.FLAG_OFFSETS));
|
||||
leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_OFFSETS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_OFFSETS));
|
||||
|
||||
// with positions only
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_POSITIONS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_POSITIONS));
|
||||
assertDocsAndPositionsEnum(leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_POSITIONS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_POSITIONS));
|
||||
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(null, leftPositions, DocsEnum.FLAG_NONE),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(null, rightPositions, DocsEnum.FLAG_NONE));
|
||||
leftPositions = leftTermsEnum.postings(null, leftPositions, PostingsEnum.FLAG_POSITIONS),
|
||||
rightPositions = rightTermsEnum.postings(null, rightPositions, PostingsEnum.FLAG_POSITIONS));
|
||||
assertPositionsSkipping(leftTermsEnum.docFreq(),
|
||||
leftPositions = leftTermsEnum.docsAndPositions(randomBits, leftPositions, DocsEnum.FLAG_NONE),
|
||||
rightPositions = rightTermsEnum.docsAndPositions(randomBits, rightPositions, DocsEnum.FLAG_NONE));
|
||||
leftPositions = leftTermsEnum.postings(randomBits, leftPositions, PostingsEnum.FLAG_POSITIONS),
|
||||
rightPositions = rightTermsEnum.postings(randomBits, rightPositions, PostingsEnum.FLAG_POSITIONS));
|
||||
|
||||
// with freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.postings(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.postings(null, rightDocs));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.postings(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.postings(randomBits, rightDocs));
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.postings(null, leftDocs, PostingsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.postings(null, rightDocs, PostingsEnum.FLAG_NONE));
|
||||
assertDocsEnum(leftDocs = leftTermsEnum.postings(randomBits, leftDocs, PostingsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.postings(randomBits, rightDocs, PostingsEnum.FLAG_NONE));
|
||||
|
||||
// with freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs));
|
||||
leftDocs = leftTermsEnum.postings(null, leftDocs),
|
||||
rightDocs = rightTermsEnum.postings(null, rightDocs));
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs));
|
||||
leftDocs = leftTermsEnum.postings(randomBits, leftDocs),
|
||||
rightDocs = rightTermsEnum.postings(randomBits, rightDocs));
|
||||
|
||||
// w/o freqs:
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(null, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(null, rightDocs, DocsEnum.FLAG_NONE));
|
||||
leftDocs = leftTermsEnum.postings(null, leftDocs, PostingsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.postings(null, rightDocs, PostingsEnum.FLAG_NONE));
|
||||
assertDocsSkipping(leftTermsEnum.docFreq(),
|
||||
leftDocs = leftTermsEnum.docs(randomBits, leftDocs, DocsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.docs(randomBits, rightDocs, DocsEnum.FLAG_NONE));
|
||||
leftDocs = leftTermsEnum.postings(randomBits, leftDocs, PostingsEnum.FLAG_NONE),
|
||||
rightDocs = rightTermsEnum.postings(randomBits, rightDocs, PostingsEnum.FLAG_NONE));
|
||||
}
|
||||
}
|
||||
assertNull(rightTermsEnum.next());
|
||||
|
@ -389,7 +388,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
/**
|
||||
* checks docs + freqs + positions + payloads, sequentially
|
||||
*/
|
||||
public void assertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception {
|
||||
public void assertDocsAndPositionsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
|
@ -413,7 +412,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
/**
|
||||
* checks docs + freqs, sequentially
|
||||
*/
|
||||
public void assertDocsEnum(DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
|
||||
public void assertDocsEnum(PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
|
@ -431,7 +430,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
/**
|
||||
* checks advancing docs
|
||||
*/
|
||||
public void assertDocsSkipping(int docFreq, DocsEnum leftDocs, DocsEnum rightDocs) throws Exception {
|
||||
public void assertDocsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null) {
|
||||
assertNull(rightDocs);
|
||||
return;
|
||||
|
@ -462,7 +461,7 @@ public class TestBlockPostingsFormat3 extends LuceneTestCase {
|
|||
/**
|
||||
* checks advancing docs + positions
|
||||
*/
|
||||
public void assertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) throws Exception {
|
||||
public void assertPositionsSkipping(int docFreq, PostingsEnum leftDocs, PostingsEnum rightDocs) throws Exception {
|
||||
if (leftDocs == null || rightDocs == null) {
|
||||
assertNull(leftDocs);
|
||||
assertNull(rightDocs);
|
||||
|
|
|
@ -30,11 +30,11 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue