mirror of https://github.com/apache/lucene.git
LUCENE-4299: add Terms hasPositions/hasOffsets, so you know what features a docs TVs have
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1371709 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
392ddde4d3
commit
e2935c224e
|
@ -8,6 +8,13 @@ http://s.apache.org/luceneversions
|
||||||
|
|
||||||
======================= Lucene 4.0.0 =======================
|
======================= Lucene 4.0.0 =======================
|
||||||
|
|
||||||
|
API Changes
|
||||||
|
|
||||||
|
* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
|
||||||
|
Previously you had no real way to know that a term vector field
|
||||||
|
had positions or offsets, since this can be configured on a
|
||||||
|
per-field-per-document basis. (Robert Muir)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
|
||||||
|
|
|
@ -253,6 +253,16 @@ public class BlockTermsReader extends FieldsProducer {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long size() {
|
public long size() {
|
||||||
return numTerms;
|
return numTerms;
|
||||||
|
|
|
@ -456,6 +456,16 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
public TermsEnum iterator(TermsEnum reuse) throws IOException {
|
||||||
return new SegmentTermsEnum();
|
return new SegmentTermsEnum();
|
||||||
|
|
|
@ -184,6 +184,9 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
final FieldsEnum fieldsEnum = vectors.iterator();
|
final FieldsEnum fieldsEnum = vectors.iterator();
|
||||||
String fieldName;
|
String fieldName;
|
||||||
String lastFieldName = null;
|
String lastFieldName = null;
|
||||||
|
|
||||||
|
TermsEnum termsEnum = null;
|
||||||
|
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
||||||
|
|
||||||
while((fieldName = fieldsEnum.next()) != null) {
|
while((fieldName = fieldsEnum.next()) != null) {
|
||||||
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
|
final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
|
||||||
|
@ -196,39 +199,30 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
// FieldsEnum shouldn't lie...
|
// FieldsEnum shouldn't lie...
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final boolean hasPositions = terms.hasPositions();
|
||||||
|
final boolean hasOffsets = terms.hasOffsets();
|
||||||
|
|
||||||
final int numTerms = (int) terms.size();
|
final int numTerms = (int) terms.size();
|
||||||
if (numTerms == -1) {
|
if (numTerms == -1) {
|
||||||
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
|
throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
|
||||||
}
|
}
|
||||||
final TermsEnum termsEnum = terms.iterator(null);
|
|
||||||
|
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
|
||||||
DocsAndPositionsEnum docsAndPositionsEnum = null;
|
termsEnum = terms.iterator(termsEnum);
|
||||||
|
|
||||||
boolean startedField = false;
|
|
||||||
|
|
||||||
// NOTE: this is tricky, because TermVectors allow
|
|
||||||
// indexing offsets but NOT positions. So we must
|
|
||||||
// lazily init the field by checking whether first
|
|
||||||
// position we see is -1 or not.
|
|
||||||
|
|
||||||
int termCount = 0;
|
int termCount = 0;
|
||||||
while(termsEnum.next() != null) {
|
while(termsEnum.next() != null) {
|
||||||
termCount++;
|
termCount++;
|
||||||
|
|
||||||
final int freq = (int) termsEnum.totalTermFreq();
|
final int freq = (int) termsEnum.totalTermFreq();
|
||||||
|
|
||||||
|
startTerm(termsEnum.term(), freq);
|
||||||
|
|
||||||
if (startedField) {
|
if (hasPositions || hasOffsets) {
|
||||||
startTerm(termsEnum.term(), freq);
|
docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
|
||||||
}
|
assert docsAndPositionsEnum != null;
|
||||||
|
|
||||||
// TODO: we need a "query" API where we can ask (via
|
|
||||||
// flex API) what this term was indexed with...
|
|
||||||
// Both positions & offsets:
|
|
||||||
docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
|
|
||||||
boolean hasOffsets = false;
|
|
||||||
boolean hasPositions = false;
|
|
||||||
|
|
||||||
if (docsAndPositionsEnum != null) {
|
|
||||||
final int docID = docsAndPositionsEnum.nextDoc();
|
final int docID = docsAndPositionsEnum.nextDoc();
|
||||||
assert docID != DocIdSetIterator.NO_MORE_DOCS;
|
assert docID != DocIdSetIterator.NO_MORE_DOCS;
|
||||||
assert docsAndPositionsEnum.freq() == freq;
|
assert docsAndPositionsEnum.freq() == freq;
|
||||||
|
@ -237,28 +231,10 @@ public abstract class TermVectorsWriter implements Closeable {
|
||||||
final int pos = docsAndPositionsEnum.nextPosition();
|
final int pos = docsAndPositionsEnum.nextPosition();
|
||||||
final int startOffset = docsAndPositionsEnum.startOffset();
|
final int startOffset = docsAndPositionsEnum.startOffset();
|
||||||
final int endOffset = docsAndPositionsEnum.endOffset();
|
final int endOffset = docsAndPositionsEnum.endOffset();
|
||||||
if (!startedField) {
|
|
||||||
assert numTerms > 0;
|
|
||||||
hasPositions = pos != -1;
|
|
||||||
hasOffsets = startOffset != -1;
|
|
||||||
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
|
|
||||||
startTerm(termsEnum.term(), freq);
|
|
||||||
startedField = true;
|
|
||||||
}
|
|
||||||
if (hasOffsets) {
|
|
||||||
assert startOffset != -1;
|
|
||||||
assert endOffset != -1;
|
|
||||||
}
|
|
||||||
assert !hasPositions || pos >= 0;
|
assert !hasPositions || pos >= 0;
|
||||||
addPosition(pos, startOffset, endOffset);
|
addPosition(pos, startOffset, endOffset);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
if (!startedField) {
|
|
||||||
assert numTerms > 0;
|
|
||||||
startField(fieldInfo, numTerms, hasPositions, hasOffsets);
|
|
||||||
startTerm(termsEnum.term(), freq);
|
|
||||||
startedField = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert termCount == numTerms;
|
assert termCount == numTerms;
|
||||||
|
|
|
@ -314,6 +314,16 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return delegateTerms.getDocCount();
|
return delegateTerms.getDocCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return delegateTerms.hasOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return delegateTerms.hasPositions();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class BloomFilteredTermsEnum extends TermsEnum {
|
class BloomFilteredTermsEnum extends TermsEnum {
|
||||||
|
|
|
@ -296,10 +296,16 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
private class TVTerms extends Terms {
|
private class TVTerms extends Terms {
|
||||||
private final int numTerms;
|
private final int numTerms;
|
||||||
private final long tvfFPStart;
|
private final long tvfFPStart;
|
||||||
|
private final boolean storePositions;
|
||||||
|
private final boolean storeOffsets;
|
||||||
|
|
||||||
|
|
||||||
public TVTerms(long tvfFP) throws IOException {
|
public TVTerms(long tvfFP) throws IOException {
|
||||||
tvf.seek(tvfFP);
|
tvf.seek(tvfFP);
|
||||||
numTerms = tvf.readVInt();
|
numTerms = tvf.readVInt();
|
||||||
|
final byte bits = tvf.readByte();
|
||||||
|
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
||||||
|
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
||||||
tvfFPStart = tvf.getFilePointer();
|
tvfFPStart = tvf.getFilePointer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,7 +320,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
} else {
|
} else {
|
||||||
termsEnum = new TVTermsEnum();
|
termsEnum = new TVTermsEnum();
|
||||||
}
|
}
|
||||||
termsEnum.reset(numTerms, tvfFPStart);
|
termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets);
|
||||||
return termsEnum;
|
return termsEnum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,6 +351,16 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
// this...? I guess codec could buffer and re-sort...
|
// this...? I guess codec could buffer and re-sort...
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return storeOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return storePositions;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TVTermsEnum extends TermsEnum {
|
private class TVTermsEnum extends TermsEnum {
|
||||||
|
@ -373,13 +389,12 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
||||||
return tvf == origTVF;
|
return tvf == origTVF;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void reset(int numTerms, long tvfFPStart) throws IOException {
|
public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets) throws IOException {
|
||||||
this.numTerms = numTerms;
|
this.numTerms = numTerms;
|
||||||
|
this.storePositions = storePositions;
|
||||||
|
this.storeOffsets = storeOffsets;
|
||||||
nextTerm = 0;
|
nextTerm = 0;
|
||||||
tvf.seek(tvfFPStart);
|
tvf.seek(tvfFPStart);
|
||||||
final byte bits = tvf.readByte();
|
|
||||||
storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
|
|
||||||
storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
|
|
||||||
tvfFP = 1+tvfFPStart;
|
tvfFP = 1+tvfFPStart;
|
||||||
positions = null;
|
positions = null;
|
||||||
startOffsets = null;
|
startOffsets = null;
|
||||||
|
|
|
@ -635,6 +635,16 @@ public class DirectPostingsFormat extends PostingsFormat {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return hasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return hasPos;
|
||||||
|
}
|
||||||
|
|
||||||
private final class DirectTermsEnum extends TermsEnum {
|
private final class DirectTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
private final BytesRef scratch = new BytesRef();
|
private final BytesRef scratch = new BytesRef();
|
||||||
|
|
|
@ -834,6 +834,16 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
return BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -609,6 +609,16 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return docCount;
|
return docCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -130,7 +130,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
|
assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
|
||||||
int termCount = parseIntAt(FIELDTERMCOUNT.length);
|
int termCount = parseIntAt(FIELDTERMCOUNT.length);
|
||||||
|
|
||||||
SimpleTVTerms terms = new SimpleTVTerms();
|
SimpleTVTerms terms = new SimpleTVTerms(offsets, positions);
|
||||||
fields.put(fieldName, terms);
|
fields.put(fieldName, terms);
|
||||||
|
|
||||||
for (int j = 0; j < termCount; j++) {
|
for (int j = 0; j < termCount; j++) {
|
||||||
|
@ -257,8 +257,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
|
|
||||||
private static class SimpleTVTerms extends Terms {
|
private static class SimpleTVTerms extends Terms {
|
||||||
final SortedMap<BytesRef,SimpleTVPostings> terms;
|
final SortedMap<BytesRef,SimpleTVPostings> terms;
|
||||||
|
final boolean hasOffsets;
|
||||||
|
final boolean hasPositions;
|
||||||
|
|
||||||
SimpleTVTerms() {
|
SimpleTVTerms(boolean hasOffsets, boolean hasPositions) {
|
||||||
|
this.hasOffsets = hasOffsets;
|
||||||
|
this.hasPositions = hasPositions;
|
||||||
terms = new TreeMap<BytesRef,SimpleTVPostings>();
|
terms = new TreeMap<BytesRef,SimpleTVPostings>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -292,6 +296,16 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
||||||
public int getDocCount() throws IOException {
|
public int getDocCount() throws IOException {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return hasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return hasPositions;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class SimpleTVPostings {
|
private static class SimpleTVPostings {
|
||||||
|
|
|
@ -718,6 +718,11 @@ public class CheckIndex {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final boolean hasPositions = terms.hasPositions();
|
||||||
|
final boolean hasOffsets = terms.hasOffsets();
|
||||||
|
// term vectors cannot omit TF
|
||||||
|
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
||||||
|
|
||||||
final TermsEnum termsEnum = terms.iterator(null);
|
final TermsEnum termsEnum = terms.iterator(null);
|
||||||
|
|
||||||
boolean hasOrd = true;
|
boolean hasOrd = true;
|
||||||
|
@ -777,17 +782,10 @@ public class CheckIndex {
|
||||||
status.termCount++;
|
status.termCount++;
|
||||||
|
|
||||||
final DocsEnum docs2;
|
final DocsEnum docs2;
|
||||||
final boolean hasPositions;
|
|
||||||
// if we are checking vectors, we have freqs implicitly
|
|
||||||
final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
|
|
||||||
// if we are checking vectors, offsets are a free-for-all anyway
|
|
||||||
final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
|
||||||
if (postings != null) {
|
if (postings != null) {
|
||||||
docs2 = postings;
|
docs2 = postings;
|
||||||
hasPositions = true;
|
|
||||||
} else {
|
} else {
|
||||||
docs2 = docs;
|
docs2 = docs;
|
||||||
hasPositions = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int lastDoc = -1;
|
int lastDoc = -1;
|
||||||
|
@ -827,10 +825,7 @@ public class CheckIndex {
|
||||||
// NOTE: pos=-1 is allowed because of ancient bug
|
// NOTE: pos=-1 is allowed because of ancient bug
|
||||||
// (LUCENE-1542) whereby IndexWriter could
|
// (LUCENE-1542) whereby IndexWriter could
|
||||||
// write pos=-1 when first token's posInc is 0
|
// write pos=-1 when first token's posInc is 0
|
||||||
// (separately: analyzers should not give
|
|
||||||
// posInc=0 to first token); also, term
|
|
||||||
// vectors are allowed to return pos=-1 if
|
|
||||||
// they indexed offset but not positions:
|
|
||||||
if (pos < -1) {
|
if (pos < -1) {
|
||||||
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
|
||||||
}
|
}
|
||||||
|
@ -1439,19 +1434,18 @@ public class CheckIndex {
|
||||||
}
|
}
|
||||||
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
|
postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
|
||||||
|
|
||||||
|
final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
|
||||||
BytesRef term = null;
|
BytesRef term = null;
|
||||||
while ((term = termsEnum.next()) != null) {
|
while ((term = termsEnum.next()) != null) {
|
||||||
|
|
||||||
final boolean hasProx;
|
|
||||||
|
|
||||||
// Try positions:
|
if (hasProx) {
|
||||||
postings = termsEnum.docsAndPositions(null, postings);
|
postings = termsEnum.docsAndPositions(null, postings);
|
||||||
if (postings == null) {
|
assert postings != null;
|
||||||
hasProx = false;
|
docs = null;
|
||||||
// Try docIDs & freqs:
|
|
||||||
docs = termsEnum.docs(null, docs);
|
|
||||||
} else {
|
} else {
|
||||||
hasProx = true;
|
docs = termsEnum.docs(null, docs);
|
||||||
|
assert docs != null;
|
||||||
|
postings = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocsEnum docs2;
|
final DocsEnum docs2;
|
||||||
|
|
|
@ -109,6 +109,16 @@ public class FilterAtomicReader extends AtomicReader {
|
||||||
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
|
public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
|
||||||
return in.intersect(automaton, bytes);
|
return in.intersect(automaton, bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return in.hasOffsets();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return in.hasPositions();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Base class for filtering {@link TermsEnum} implementations. */
|
/** Base class for filtering {@link TermsEnum} implementations. */
|
||||||
|
|
|
@ -37,12 +37,17 @@ public final class MultiTerms extends Terms {
|
||||||
private final Terms[] subs;
|
private final Terms[] subs;
|
||||||
private final ReaderSlice[] subSlices;
|
private final ReaderSlice[] subSlices;
|
||||||
private final Comparator<BytesRef> termComp;
|
private final Comparator<BytesRef> termComp;
|
||||||
|
private final boolean hasOffsets;
|
||||||
|
private final boolean hasPositions;
|
||||||
|
|
||||||
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
|
public MultiTerms(Terms[] subs, ReaderSlice[] subSlices) throws IOException {
|
||||||
this.subs = subs;
|
this.subs = subs;
|
||||||
this.subSlices = subSlices;
|
this.subSlices = subSlices;
|
||||||
|
|
||||||
Comparator<BytesRef> _termComp = null;
|
Comparator<BytesRef> _termComp = null;
|
||||||
|
assert subs.length > 0 : "inefficient: don't use MultiTerms over one sub";
|
||||||
|
boolean _hasOffsets = true;
|
||||||
|
boolean _hasPositions = true;
|
||||||
for(int i=0;i<subs.length;i++) {
|
for(int i=0;i<subs.length;i++) {
|
||||||
if (_termComp == null) {
|
if (_termComp == null) {
|
||||||
_termComp = subs[i].getComparator();
|
_termComp = subs[i].getComparator();
|
||||||
|
@ -54,9 +59,13 @@ public final class MultiTerms extends Terms {
|
||||||
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
|
throw new IllegalStateException("sub-readers have different BytesRef.Comparators; cannot merge");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_hasOffsets &= subs[i].hasOffsets();
|
||||||
|
_hasPositions &= subs[i].hasPositions();
|
||||||
}
|
}
|
||||||
|
|
||||||
termComp = _termComp;
|
termComp = _termComp;
|
||||||
|
hasOffsets = _hasOffsets;
|
||||||
|
hasPositions = _hasPositions;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -142,5 +151,15 @@ public final class MultiTerms extends Terms {
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return termComp;
|
return termComp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return hasOffsets;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return hasPositions;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -104,6 +104,12 @@ public abstract class Terms {
|
||||||
* measures, this measure does not take deleted documents
|
* measures, this measure does not take deleted documents
|
||||||
* into account. */
|
* into account. */
|
||||||
public abstract int getDocCount() throws IOException;
|
public abstract int getDocCount() throws IOException;
|
||||||
|
|
||||||
|
/** Returns true if documents in this field store offsets. */
|
||||||
|
public abstract boolean hasOffsets();
|
||||||
|
|
||||||
|
/** Returns true if documents in this field store positions. */
|
||||||
|
public abstract boolean hasPositions();
|
||||||
|
|
||||||
public final static Terms[] EMPTY_ARRAY = new Terms[0];
|
public final static Terms[] EMPTY_ARRAY = new Terms[0];
|
||||||
}
|
}
|
||||||
|
|
|
@ -207,6 +207,8 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
assertTermsStatistics(leftTerms, rightTerms);
|
assertTermsStatistics(leftTerms, rightTerms);
|
||||||
|
assertEquals(leftTerms.hasOffsets(), rightTerms.hasOffsets());
|
||||||
|
assertEquals(leftTerms.hasPositions(), rightTerms.hasPositions());
|
||||||
|
|
||||||
TermsEnum leftTermsEnum = leftTerms.iterator(null);
|
TermsEnum leftTermsEnum = leftTerms.iterator(null);
|
||||||
TermsEnum rightTermsEnum = rightTerms.iterator(null);
|
TermsEnum rightTermsEnum = rightTerms.iterator(null);
|
||||||
|
|
|
@ -122,6 +122,16 @@ public final class FieldCacheRewriteMethod extends MultiTermQuery.RewriteMethod
|
||||||
public long size() {
|
public long size() {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
assert termsEnum != null;
|
assert termsEnum != null;
|
||||||
|
|
|
@ -125,20 +125,7 @@ public class TokenSources {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasPositions(Terms vector) throws IOException {
|
private static boolean hasPositions(Terms vector) throws IOException {
|
||||||
final TermsEnum termsEnum = vector.iterator(null);
|
return vector.hasPositions();
|
||||||
if (termsEnum.next() != null) {
|
|
||||||
DocsAndPositionsEnum dpEnum = termsEnum.docsAndPositions(null, null, DocsAndPositionsEnum.FLAG_PAYLOADS);
|
|
||||||
if (dpEnum != null) {
|
|
||||||
int doc = dpEnum.nextDoc();
|
|
||||||
assert doc >= 0 && doc != DocIdSetIterator.NO_MORE_DOCS;
|
|
||||||
int pos = dpEnum.nextPosition();
|
|
||||||
if (pos >= 0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -56,18 +56,17 @@ public final class TokenStreamFromTermPositionVector extends TokenStream {
|
||||||
termAttribute = addAttribute(CharTermAttribute.class);
|
termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
|
positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
offsetAttribute = addAttribute(OffsetAttribute.class);
|
offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||||
|
final boolean hasOffsets = vector.hasOffsets();
|
||||||
final TermsEnum termsEnum = vector.iterator(null);
|
final TermsEnum termsEnum = vector.iterator(null);
|
||||||
BytesRef text;
|
BytesRef text;
|
||||||
DocsAndPositionsEnum dpEnum = null;
|
DocsAndPositionsEnum dpEnum = null;
|
||||||
while((text = termsEnum.next()) != null) {
|
while((text = termsEnum.next()) != null) {
|
||||||
dpEnum = termsEnum.docsAndPositions(null, dpEnum);
|
dpEnum = termsEnum.docsAndPositions(null, dpEnum);
|
||||||
assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier
|
assert dpEnum != null; // presumably checked by TokenSources.hasPositions earlier
|
||||||
boolean hasOffsets = true;
|
|
||||||
dpEnum.nextDoc();
|
dpEnum.nextDoc();
|
||||||
final int freq = dpEnum.freq();
|
final int freq = dpEnum.freq();
|
||||||
for (int j = 0; j < freq; j++) {
|
for (int j = 0; j < freq; j++) {
|
||||||
int pos = dpEnum.nextPosition();
|
int pos = dpEnum.nextPosition();
|
||||||
hasOffsets &= dpEnum.startOffset() >= 0;
|
|
||||||
Token token;
|
Token token;
|
||||||
if (hasOffsets) {
|
if (hasOffsets) {
|
||||||
token = new Token(text.utf8ToString(),
|
token = new Token(text.utf8ToString(),
|
||||||
|
|
|
@ -778,8 +778,16 @@ public class MemoryIndex {
|
||||||
public int getDocCount() {
|
public int getDocCount() {
|
||||||
return info.sortedTerms.length > 0 ? 1 : 0;
|
return info.sortedTerms.length > 0 ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return stride == 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,9 +127,11 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
||||||
long sumTotalTermFreq;
|
long sumTotalTermFreq;
|
||||||
long sumDocFreq;
|
long sumDocFreq;
|
||||||
int docCount;
|
int docCount;
|
||||||
|
final FieldInfo.IndexOptions options;
|
||||||
|
|
||||||
RAMField(String field) {
|
RAMField(String field, FieldInfo.IndexOptions options) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
|
this.options = options;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -161,6 +163,16 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
||||||
public Comparator<BytesRef> getComparator() {
|
public Comparator<BytesRef> getComparator() {
|
||||||
return reverseUnicodeComparator;
|
return reverseUnicodeComparator;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
return options.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static class RAMTerm {
|
static class RAMTerm {
|
||||||
|
@ -198,7 +210,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
||||||
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
if (field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
|
||||||
throw new UnsupportedOperationException("this codec cannot index offsets");
|
throw new UnsupportedOperationException("this codec cannot index offsets");
|
||||||
}
|
}
|
||||||
RAMField ramField = new RAMField(field.name);
|
RAMField ramField = new RAMField(field.name, field.getIndexOptions());
|
||||||
postings.fieldToTerms.put(field.name, ramField);
|
postings.fieldToTerms.put(field.name, ramField);
|
||||||
termsConsumer.reset(ramField);
|
termsConsumer.reset(ramField);
|
||||||
return termsConsumer;
|
return termsConsumer;
|
||||||
|
|
Loading…
Reference in New Issue