mirror of https://github.com/apache/lucene.git
LUCENE-5677: simplify position handling in DefaultIndexingChain
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4affe33430
commit
5cdde67569
|
@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
try {
|
||||
for (IndexableField field : docState.doc.indexableFields()) {
|
||||
IndexableFieldType fieldType = field.fieldType();
|
||||
|
||||
// if the field omits norms, the boost cannot be indexed.
|
||||
if (fieldType.omitNorms() && field.boost() != 1.0f) {
|
||||
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
|
||||
}
|
||||
|
||||
PerField fp = getOrAddField(field.name(), fieldType, true);
|
||||
boolean first = fp.fieldGen != fieldGen;
|
||||
fp.invert(field, first);
|
||||
|
@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
|
||||
IndexableFieldType fieldType = field.fieldType();
|
||||
|
||||
// if the field omits norms, the boost cannot be indexed.
|
||||
if (fieldType.omitNorms() && field.boost() != 1.0f) {
|
||||
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
|
||||
}
|
||||
|
||||
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
|
||||
|
||||
// only bother checking offsets if something will consume them.
|
||||
|
@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
|
||||
|
||||
int lastStartOffset = 0;
|
||||
int lastPosition = 0;
|
||||
|
||||
/*
|
||||
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
|
||||
|
@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
// will be marked as deleted, but still
|
||||
// consume a docID
|
||||
|
||||
final int posIncr = invertState.posIncrAttribute.getPositionIncrement();
|
||||
if (posIncr < 0) {
|
||||
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
|
||||
int posIncr = invertState.posIncrAttribute.getPositionIncrement();
|
||||
invertState.position += posIncr;
|
||||
if (invertState.position < lastPosition) {
|
||||
if (posIncr == 0) {
|
||||
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
|
||||
}
|
||||
throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
|
||||
}
|
||||
if (invertState.position == 0 && posIncr == 0) {
|
||||
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
|
||||
}
|
||||
int position = invertState.position + posIncr;
|
||||
if (position > 0) {
|
||||
// NOTE: confusing: this "mirrors" the
|
||||
// position++ we do below
|
||||
position--;
|
||||
} else if (position < 0) {
|
||||
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
|
||||
}
|
||||
|
||||
// position is legal, we can safely place it in invertState now.
|
||||
// not sure if anything will use invertState after non-aborting exc...
|
||||
invertState.position = position;
|
||||
|
||||
lastPosition = invertState.position;
|
||||
if (posIncr == 0) {
|
||||
invertState.numOverlap++;
|
||||
}
|
||||
|
@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
if (checkOffsets) {
|
||||
int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
|
||||
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
|
||||
if (startOffset < 0 || endOffset < startOffset) {
|
||||
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
|
||||
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
|
||||
}
|
||||
if (startOffset < lastStartOffset) {
|
||||
throw new IllegalArgumentException("offsets must not go backwards startOffset="
|
||||
+ startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
|
||||
if (startOffset < lastStartOffset || endOffset < startOffset) {
|
||||
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
|
||||
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
|
||||
}
|
||||
lastStartOffset = startOffset;
|
||||
}
|
||||
|
@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer {
|
|||
aborting = false;
|
||||
|
||||
invertState.length++;
|
||||
invertState.position++;
|
||||
}
|
||||
|
||||
// trigger streams to perform end-of-stream operations
|
||||
|
|
|
@ -67,7 +67,7 @@ public final class FieldInvertState {
|
|||
* Re-initialize the state
|
||||
*/
|
||||
void reset() {
|
||||
position = 0;
|
||||
position = -1;
|
||||
length = 0;
|
||||
numOverlap = 0;
|
||||
offset = 0;
|
||||
|
|
|
@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
public void testCrazyPositionIncrementGap() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
return -2;
|
||||
}
|
||||
};
|
||||
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
// add good document
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
try {
|
||||
doc.add(newTextField("foo", "bar", Field.Store.NO));
|
||||
doc.add(newTextField("foo", "bar", Field.Store.NO));
|
||||
iw.addDocument(doc);
|
||||
fail("didn't get expected exception");
|
||||
} catch (IllegalArgumentException expected) {}
|
||||
iw.shutdown();
|
||||
|
||||
// make sure we see our good doc
|
||||
DirectoryReader r = DirectoryReader.open(dir);
|
||||
assertEquals(1, r.numDocs());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
|
||||
|
||||
static class UOEDirectory extends RAMDirectory {
|
||||
|
|
|
@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -446,6 +448,40 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
makeToken("foo", 0, 0, 3)
|
||||
});
|
||||
}
|
||||
|
||||
public void testCrazyOffsetGap() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOffsetGap(String fieldName) {
|
||||
return -10;
|
||||
}
|
||||
};
|
||||
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
// add good document
|
||||
Document doc = new Document();
|
||||
iw.addDocument(doc);
|
||||
try {
|
||||
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
|
||||
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
doc.add(new Field("foo", "bar", ft));
|
||||
doc.add(new Field("foo", "bar", ft));
|
||||
iw.addDocument(doc);
|
||||
fail("didn't get expected exception");
|
||||
} catch (IllegalArgumentException expected) {}
|
||||
iw.shutdown();
|
||||
|
||||
// make sure we see our good doc
|
||||
DirectoryReader r = DirectoryReader.open(dir);
|
||||
assertEquals(1, r.numDocs());
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testLegalbutVeryLargeOffsets() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
|
Loading…
Reference in New Issue