LUCENE-5677: simplify position handling in DefaultIndexingChain

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595469 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-17 13:05:28 +00:00
parent 4affe33430
commit 5cdde67569
4 changed files with 87 additions and 33 deletions

View File

@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer {
try {
for (IndexableField field : docState.doc.indexableFields()) {
IndexableFieldType fieldType = field.fieldType();
// if the field omits norms, the boost cannot be indexed.
if (fieldType.omitNorms() && field.boost() != 1.0f) {
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
}
PerField fp = getOrAddField(field.name(), fieldType, true);
boolean first = fp.fieldGen != fieldGen;
fp.invert(field, first);
@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer {
IndexableFieldType fieldType = field.fieldType();
// if the field omits norms, the boost cannot be indexed.
if (fieldType.omitNorms() && field.boost() != 1.0f) {
throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
}
final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
// only bother checking offsets if something will consume them.
@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer {
final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
int lastStartOffset = 0;
int lastPosition = 0;
/*
* To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer {
// will be marked as deleted, but still
// consume a docID
final int posIncr = invertState.posIncrAttribute.getPositionIncrement();
if (posIncr < 0) {
throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
int posIncr = invertState.posIncrAttribute.getPositionIncrement();
invertState.position += posIncr;
if (invertState.position < lastPosition) {
if (posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
}
throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
}
if (invertState.position == 0 && posIncr == 0) {
throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
}
int position = invertState.position + posIncr;
if (position > 0) {
// NOTE: confusing: this "mirrors" the
// position++ we do below
position--;
} else if (position < 0) {
throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
}
// position is legal, we can safely place it in invertState now.
// not sure if anything will use invertState after non-aborting exc...
invertState.position = position;
lastPosition = invertState.position;
if (posIncr == 0) {
invertState.numOverlap++;
}
@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer {
if (checkOffsets) {
int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
if (startOffset < 0 || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
}
if (startOffset < lastStartOffset) {
throw new IllegalArgumentException("offsets must not go backwards startOffset="
+ startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
if (startOffset < lastStartOffset || endOffset < startOffset) {
throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
+ "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
}
lastStartOffset = startOffset;
}
@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer {
aborting = false;
invertState.length++;
invertState.position++;
}
// trigger streams to perform end-of-stream operations

View File

@ -67,7 +67,7 @@ public final class FieldInvertState {
* Re-initialize the state
*/
void reset() {
position = 0;
position = -1;
length = 0;
numOverlap = 0;
offset = 0;

View File

@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
dir.close();
}
public void testCrazyPositionIncrementGap() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
}
@Override
public int getPositionIncrementGap(String fieldName) {
return -2;
}
};
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
// add good document
Document doc = new Document();
iw.addDocument(doc);
try {
doc.add(newTextField("foo", "bar", Field.Store.NO));
doc.add(newTextField("foo", "bar", Field.Store.NO));
iw.addDocument(doc);
fail("didn't get expected exception");
} catch (IllegalArgumentException expected) {}
iw.shutdown();
// make sure we see our good doc
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.numDocs());
r.close();
dir.close();
}
// TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
static class UOEDirectory extends RAMDirectory {

View File

@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockPayloadAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@ -446,6 +448,40 @@ public class TestPostingsOffsets extends LuceneTestCase {
makeToken("foo", 0, 0, 3)
});
}
public void testCrazyOffsetGap() throws Exception {
Directory dir = newDirectory();
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
}
@Override
public int getOffsetGap(String fieldName) {
return -10;
}
};
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
// add good document
Document doc = new Document();
iw.addDocument(doc);
try {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
doc.add(new Field("foo", "bar", ft));
doc.add(new Field("foo", "bar", ft));
iw.addDocument(doc);
fail("didn't get expected exception");
} catch (IllegalArgumentException expected) {}
iw.shutdown();
// make sure we see our good doc
DirectoryReader r = DirectoryReader.open(dir);
assertEquals(1, r.numDocs());
r.close();
dir.close();
}
public void testLegalbutVeryLargeOffsets() throws Exception {
Directory dir = newDirectory();