diff --git a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java index 70e60896c2a..89f54420898 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java +++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java @@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer { try { for (IndexableField field : docState.doc.indexableFields()) { IndexableFieldType fieldType = field.fieldType(); + + // if the field omits norms, the boost cannot be indexed. + if (fieldType.omitNorms() && field.boost() != 1.0f) { + throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'"); + } + PerField fp = getOrAddField(field.name(), fieldType, true); boolean first = fp.fieldGen != fieldGen; fp.invert(field, first); @@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer { IndexableFieldType fieldType = field.fieldType(); - // if the field omits norms, the boost cannot be indexed. - if (fieldType.omitNorms() && field.boost() != 1.0f) { - throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'"); - } - final boolean analyzed = fieldType.tokenized() && docState.analyzer != null; // only bother checking offsets if something will consume them. @@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer { final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; int lastStartOffset = 0; + int lastPosition = 0; /* * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream @@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer { // will be marked as deleted, but still // consume a docID - final int posIncr = invertState.posIncrAttribute.getPositionIncrement(); - if (posIncr < 0) { - throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'"); + int posIncr = invertState.posIncrAttribute.getPositionIncrement(); + invertState.position += posIncr; + if (invertState.position < lastPosition) { + if (posIncr == 0) { + throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'"); + } + throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'"); } - if (invertState.position == 0 && posIncr == 0) { - throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'"); - } - int position = invertState.position + posIncr; - if (position > 0) { - // NOTE: confusing: this "mirrors" the - // position++ we do below - position--; - } else if (position < 0) { - throw new IllegalArgumentException("position overflow for field '" + field.name() + "'"); - } - - // position is legal, we can safely place it in invertState now. - // not sure if anything will use invertState after non-aborting exc... - invertState.position = position; - + lastPosition = invertState.position; if (posIncr == 0) { invertState.numOverlap++; } @@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer { if (checkOffsets) { int startOffset = invertState.offset + invertState.offsetAttribute.startOffset(); int endOffset = invertState.offset + invertState.offsetAttribute.endOffset(); - if (startOffset < 0 || endOffset < startOffset) { - throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, " - + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'"); - } - if (startOffset < lastStartOffset) { - throw new IllegalArgumentException("offsets must not go backwards startOffset=" - + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'"); + if (startOffset < lastStartOffset || endOffset < startOffset) { + throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards " + + "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'"); } lastStartOffset = startOffset; } @@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer { aborting = false; invertState.length++; - invertState.position++; } // trigger streams to perform end-of-stream operations diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java index 80f0bbe8d31..776cfb84554 100644 --- a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java +++ b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java @@ -67,7 +67,7 @@ public final class FieldInvertState { * Re-initialize the state */ void reset() { - position = 0; + position = -1; length = 0; numOverlap = 0; offset = 0; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java index f0633ece7d0..8d78d5d2217 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java @@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase { dir.close(); } + public void testCrazyPositionIncrementGap() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false)); + } + + @Override + public int getPositionIncrementGap(String fieldName) { + return -2; + } + }; + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + // add good document + Document doc = new Document(); + iw.addDocument(doc); + try { + doc.add(newTextField("foo", "bar", Field.Store.NO)); + doc.add(newTextField("foo", "bar", Field.Store.NO)); + iw.addDocument(doc); + fail("didn't get expected exception"); + } catch (IllegalArgumentException expected) {} + iw.shutdown(); + + // make sure we see our good doc + DirectoryReader r = DirectoryReader.open(dir); + assertEquals(1, r.numDocs()); + r.close(); + dir.close(); + } + // TODO: we could also check isValid, to catch "broken" bytesref values, might be too much? static class UOEDirectory extends RAMDirectory { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java index d84d151cab6..5654511e916 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java @@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockPayloadAnalyzer; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Analyzer.TokenStreamComponents; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -446,6 +448,40 @@ public class TestPostingsOffsets extends LuceneTestCase { makeToken("foo", 0, 0, 3) }); } + + public void testCrazyOffsetGap() throws Exception { + Directory dir = newDirectory(); + Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false)); + } + + @Override + public int getOffsetGap(String fieldName) { + return -10; + } + }; + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); + // add good document + Document doc = new Document(); + iw.addDocument(doc); + try { + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + doc.add(new Field("foo", "bar", ft)); + doc.add(new Field("foo", "bar", ft)); + iw.addDocument(doc); + fail("didn't get expected exception"); + } catch (IllegalArgumentException expected) {} + iw.shutdown(); + + // make sure we see our good doc + DirectoryReader r = DirectoryReader.open(dir); + assertEquals(1, r.numDocs()); + r.close(); + dir.close(); + } public void testLegalbutVeryLargeOffsets() throws Exception { Directory dir = newDirectory();