mirror of https://github.com/apache/lucene.git
LUCENE-5677: simplify position handling in DefaultIndexingChain
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4affe33430
commit
5cdde67569
|
@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
try {
|
try {
|
||||||
for (IndexableField field : docState.doc.indexableFields()) {
|
for (IndexableField field : docState.doc.indexableFields()) {
|
||||||
IndexableFieldType fieldType = field.fieldType();
|
IndexableFieldType fieldType = field.fieldType();
|
||||||
|
|
||||||
|
// if the field omits norms, the boost cannot be indexed.
|
||||||
|
if (fieldType.omitNorms() && field.boost() != 1.0f) {
|
||||||
|
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
|
||||||
|
}
|
||||||
|
|
||||||
PerField fp = getOrAddField(field.name(), fieldType, true);
|
PerField fp = getOrAddField(field.name(), fieldType, true);
|
||||||
boolean first = fp.fieldGen != fieldGen;
|
boolean first = fp.fieldGen != fieldGen;
|
||||||
fp.invert(field, first);
|
fp.invert(field, first);
|
||||||
|
@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
|
|
||||||
IndexableFieldType fieldType = field.fieldType();
|
IndexableFieldType fieldType = field.fieldType();
|
||||||
|
|
||||||
// if the field omits norms, the boost cannot be indexed.
|
|
||||||
if (fieldType.omitNorms() && field.boost() != 1.0f) {
|
|
||||||
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
|
|
||||||
}
|
|
||||||
|
|
||||||
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
|
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
|
||||||
|
|
||||||
// only bother checking offsets if something will consume them.
|
// only bother checking offsets if something will consume them.
|
||||||
|
@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||||
|
|
||||||
int lastStartOffset = 0;
|
int lastStartOffset = 0;
|
||||||
|
int lastPosition = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
|
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
|
||||||
|
@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
// will be marked as deleted, but still
|
// will be marked as deleted, but still
|
||||||
// consume a docID
|
// consume a docID
|
||||||
|
|
||||||
final int posIncr = invertState.posIncrAttribute.getPositionIncrement();
|
int posIncr = invertState.posIncrAttribute.getPositionIncrement();
|
||||||
if (posIncr < 0) {
|
invertState.position += posIncr;
|
||||||
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
|
if (invertState.position < lastPosition) {
|
||||||
|
if (posIncr == 0) {
|
||||||
|
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
|
||||||
}
|
}
|
||||||
if (invertState.position == 0 && posIncr == 0) {
|
lastPosition = invertState.position;
|
||||||
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
|
|
||||||
}
|
|
||||||
int position = invertState.position + posIncr;
|
|
||||||
if (position > 0) {
|
|
||||||
// NOTE: confusing: this "mirrors" the
|
|
||||||
// position++ we do below
|
|
||||||
position--;
|
|
||||||
} else if (position < 0) {
|
|
||||||
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
|
|
||||||
}
|
|
||||||
|
|
||||||
// position is legal, we can safely place it in invertState now.
|
|
||||||
// not sure if anything will use invertState after non-aborting exc...
|
|
||||||
invertState.position = position;
|
|
||||||
|
|
||||||
if (posIncr == 0) {
|
if (posIncr == 0) {
|
||||||
invertState.numOverlap++;
|
invertState.numOverlap++;
|
||||||
}
|
}
|
||||||
|
@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
if (checkOffsets) {
|
if (checkOffsets) {
|
||||||
int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
|
int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
|
||||||
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
|
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
|
||||||
if (startOffset < 0 || endOffset < startOffset) {
|
if (startOffset < lastStartOffset || endOffset < startOffset) {
|
||||||
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
|
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
|
||||||
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
|
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
|
||||||
}
|
|
||||||
if (startOffset < lastStartOffset) {
|
|
||||||
throw new IllegalArgumentException("offsets must not go backwards startOffset="
|
|
||||||
+ startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
|
|
||||||
}
|
}
|
||||||
lastStartOffset = startOffset;
|
lastStartOffset = startOffset;
|
||||||
}
|
}
|
||||||
|
@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
||||||
aborting = false;
|
aborting = false;
|
||||||
|
|
||||||
invertState.length++;
|
invertState.length++;
|
||||||
invertState.position++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// trigger streams to perform end-of-stream operations
|
// trigger streams to perform end-of-stream operations
|
||||||
|
|
|
@ -67,7 +67,7 @@ public final class FieldInvertState {
|
||||||
* Re-initialize the state
|
* Re-initialize the state
|
||||||
*/
|
*/
|
||||||
void reset() {
|
void reset() {
|
||||||
position = 0;
|
position = -1;
|
||||||
length = 0;
|
length = 0;
|
||||||
numOverlap = 0;
|
numOverlap = 0;
|
||||||
offset = 0;
|
offset = 0;
|
||||||
|
|
|
@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCrazyPositionIncrementGap() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
Analyzer analyzer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getPositionIncrementGap(String fieldName) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||||
|
// add good document
|
||||||
|
Document doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
try {
|
||||||
|
doc.add(newTextField("foo", "bar", Field.Store.NO));
|
||||||
|
doc.add(newTextField("foo", "bar", Field.Store.NO));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
fail("didn't get expected exception");
|
||||||
|
} catch (IllegalArgumentException expected) {}
|
||||||
|
iw.shutdown();
|
||||||
|
|
||||||
|
// make sure we see our good doc
|
||||||
|
DirectoryReader r = DirectoryReader.open(dir);
|
||||||
|
assertEquals(1, r.numDocs());
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
|
// TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
|
||||||
|
|
||||||
static class UOEDirectory extends RAMDirectory {
|
static class UOEDirectory extends RAMDirectory {
|
||||||
|
|
|
@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.CannedTokenStream;
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
|
@ -447,6 +449,40 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCrazyOffsetGap() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
Analyzer analyzer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getOffsetGap(String fieldName) {
|
||||||
|
return -10;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||||
|
// add good document
|
||||||
|
Document doc = new Document();
|
||||||
|
iw.addDocument(doc);
|
||||||
|
try {
|
||||||
|
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
|
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||||
|
doc.add(new Field("foo", "bar", ft));
|
||||||
|
doc.add(new Field("foo", "bar", ft));
|
||||||
|
iw.addDocument(doc);
|
||||||
|
fail("didn't get expected exception");
|
||||||
|
} catch (IllegalArgumentException expected) {}
|
||||||
|
iw.shutdown();
|
||||||
|
|
||||||
|
// make sure we see our good doc
|
||||||
|
DirectoryReader r = DirectoryReader.open(dir);
|
||||||
|
assertEquals(1, r.numDocs());
|
||||||
|
r.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void testLegalbutVeryLargeOffsets() throws Exception {
|
public void testLegalbutVeryLargeOffsets() throws Exception {
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||||
|
|
Loading…
Reference in New Issue