LUCENE-5677: simplify position handling in DefaultIndexingChain

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595469 13f79535-47bb-0310-9956-ffa450edef68
2014-05-17 13:05:28 +00:00 · 2014-05-17 13:05:28 +00:00 · 5cdde67569
parent 4affe33430
commit 5cdde67569
4 changed files with 87 additions and 33 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer {
    try {
      for (IndexableField field : docState.doc.indexableFields()) {
        IndexableFieldType fieldType = field.fieldType();
        // if the field omits norms, the boost cannot be indexed.
        if (fieldType.omitNorms() && field.boost() != 1.0f) {
          throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
        }
        PerField fp = getOrAddField(field.name(), fieldType, true);
        boolean first = fp.fieldGen != fieldGen;
        fp.invert(field, first);
@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer {
      IndexableFieldType fieldType = field.fieldType();
      // if the field omits norms, the boost cannot be indexed.
      if (fieldType.omitNorms() && field.boost() != 1.0f) {
        throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
      }
      final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
      // only bother checking offsets if something will consume them.
@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer {
      final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
      int lastStartOffset = 0;
      int lastPosition = 0;
      /*
       * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer {
          // will be marked as deleted, but still
          // consume a docID
-          final int posIncr = invertState.posIncrAttribute.getPositionIncrement();
+          int posIncr = invertState.posIncrAttribute.getPositionIncrement();
-          if (posIncr < 0) {
+          invertState.position += posIncr;
-            throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
+          if (invertState.position < lastPosition) {
            if (posIncr == 0) {
              throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
            }
            throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
          }
-          if (invertState.position == 0 && posIncr == 0) {
+          lastPosition = invertState.position;
            throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
          }
          int position = invertState.position + posIncr;
          if (position > 0) {
            // NOTE: confusing: this "mirrors" the
            // position++ we do below
            position--;
          } else if (position < 0) {
            throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
          }
          // position is legal, we can safely place it in invertState now.
          // not sure if anything will use invertState after non-aborting exc...
          invertState.position = position;
          if (posIncr == 0) {
            invertState.numOverlap++;
          }
@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer {
          if (checkOffsets) {
            int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
            int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
-            if (startOffset < 0 || endOffset < startOffset) {
+            if (startOffset < lastStartOffset || endOffset < startOffset) {
-              throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
+              throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
-                                                 + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
+                                                 + "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
            }
            if (startOffset < lastStartOffset) {
              throw new IllegalArgumentException("offsets must not go backwards startOffset=" 
                                                 + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
            }
            lastStartOffset = startOffset;
          }
@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer {
          aborting = false;
          invertState.length++;
          invertState.position++;
        }
        // trigger streams to perform end-of-stream operations
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
@ -67,7 +67,7 @@ public final class FieldInvertState {
   * Re-initialize the state
   */
  void reset() {
-    position = 0;
+    position = -1;
    length = 0;
    numOverlap = 0;
    offset = 0;
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
    dir.close();
  }
  public void testCrazyPositionIncrementGap() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
      }
      @Override
      public int getPositionIncrementGap(String fieldName) {
        return -2;
      }
    };
    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    // add good document
    Document doc = new Document();
    iw.addDocument(doc);
    try {
      doc.add(newTextField("foo", "bar", Field.Store.NO));
      doc.add(newTextField("foo", "bar", Field.Store.NO));
      iw.addDocument(doc);
      fail("didn't get expected exception");
    } catch (IllegalArgumentException expected) {}
    iw.shutdown();
    // make sure we see our good doc
    DirectoryReader r = DirectoryReader.open(dir);   
    assertEquals(1, r.numDocs());
    r.close();
    dir.close();
  }
  // TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
  static class UOEDirectory extends RAMDirectory {
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockPayloadAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@ -447,6 +449,40 @@ public class TestPostingsOffsets extends LuceneTestCase {
      });
  }
  public void testCrazyOffsetGap() throws Exception {
    Directory dir = newDirectory();
    Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
      }
      @Override
      public int getOffsetGap(String fieldName) {
        return -10;
      }
    };
    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    // add good document
    Document doc = new Document();
    iw.addDocument(doc);
    try {
      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
      ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
      doc.add(new Field("foo", "bar", ft));
      doc.add(new Field("foo", "bar", ft));
      iw.addDocument(doc);
      fail("didn't get expected exception");
    } catch (IllegalArgumentException expected) {}
    iw.shutdown();
    // make sure we see our good doc
    DirectoryReader r = DirectoryReader.open(dir);   
    assertEquals(1, r.numDocs());
    r.close();
    dir.close();
  }
  public void testLegalbutVeryLargeOffsets() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));