LUCENE-5677: simplify position handling in DefaultIndexingChain

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1595469 13f79535-47bb-0310-9956-ffa450edef68
2014-05-17 13:05:28 +00:00 · 2014-05-17 13:05:28 +00:00 · 5cdde67569
parent 4affe33430
commit 5cdde67569
4 changed files with 87 additions and 33 deletions
--- a/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
@ -300,6 +300,12 @@ final class DefaultIndexingChain extends DocConsumer {
    try {
      for (IndexableField field : docState.doc.indexableFields()) {
        IndexableFieldType fieldType = field.fieldType();
+        
+        // if the field omits norms, the boost cannot be indexed.
+        if (fieldType.omitNorms() && field.boost() != 1.0f) {
+          throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
+        }
+        
        PerField fp = getOrAddField(field.name(), fieldType, true);
        boolean first = fp.fieldGen != fieldGen;
        fp.invert(field, first);
@ -557,11 +563,6 @@ final class DefaultIndexingChain extends DocConsumer {

      IndexableFieldType fieldType = field.fieldType();

-      // if the field omits norms, the boost cannot be indexed.
-      if (fieldType.omitNorms() && field.boost() != 1.0f) {
-        throw new UnsupportedOperationException("You cannot set an index-time boost: norms are omitted for field '" + field.name() + "'");
-      }
-
      final boolean analyzed = fieldType.tokenized() && docState.analyzer != null;
        
      // only bother checking offsets if something will consume them.
@ -569,6 +570,7 @@ final class DefaultIndexingChain extends DocConsumer {
      final boolean checkOffsets = fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

      int lastStartOffset = 0;
+      int lastPosition = 0;
        
      /*
       * To assist people in tracking down problems in analysis components, we wish to write the field name to the infostream
@ -593,26 +595,15 @@ final class DefaultIndexingChain extends DocConsumer {
          // will be marked as deleted, but still
          // consume a docID

-          final int posIncr = invertState.posIncrAttribute.getPositionIncrement();
-          if (posIncr < 0) {
-            throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ") for field '" + field.name() + "'");
-          }
-          if (invertState.position == 0 && posIncr == 0) {
+          int posIncr = invertState.posIncrAttribute.getPositionIncrement();
+          invertState.position += posIncr;
+          if (invertState.position < lastPosition) {
+            if (posIncr == 0) {
              throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
            }
-          int position = invertState.position + posIncr;
-          if (position > 0) {
-            // NOTE: confusing: this "mirrors" the
-            // position++ we do below
-            position--;
-          } else if (position < 0) {
-            throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
+            throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
          }
-              
-          // position is legal, we can safely place it in invertState now.
-          // not sure if anything will use invertState after non-aborting exc...
-          invertState.position = position;
-
+          lastPosition = invertState.position;
          if (posIncr == 0) {
            invertState.numOverlap++;
          }
@ -620,13 +611,9 @@ final class DefaultIndexingChain extends DocConsumer {
          if (checkOffsets) {
            int startOffset = invertState.offset + invertState.offsetAttribute.startOffset();
            int endOffset = invertState.offset + invertState.offsetAttribute.endOffset();
-            if (startOffset < 0 || endOffset < startOffset) {
-              throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, "
-                                                 + "startOffset=" + startOffset + ",endOffset=" + endOffset + " for field '" + field.name() + "'");
-            }
-            if (startOffset < lastStartOffset) {
-              throw new IllegalArgumentException("offsets must not go backwards startOffset=" 
-                                                 + startOffset + " is < lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
+            if (startOffset < lastStartOffset || endOffset < startOffset) {
+              throw new IllegalArgumentException("startOffset must be non-negative, and endOffset must be >= startOffset, and offsets must not go backwards "
+                                                 + "startOffset=" + startOffset + ",endOffset=" + endOffset + ",lastStartOffset=" + lastStartOffset + " for field '" + field.name() + "'");
            }
            lastStartOffset = startOffset;
          }
@ -644,7 +631,6 @@ final class DefaultIndexingChain extends DocConsumer {
          aborting = false;

          invertState.length++;
-          invertState.position++;
        }

        // trigger streams to perform end-of-stream operations
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInvertState.java
@ -67,7 +67,7 @@ public final class FieldInvertState {
   * Re-initialize the state
   */
  void reset() {
-    position = 0;
+    position = -1;
    length = 0;
    numOverlap = 0;
    offset = 0;
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
@ -1637,6 +1637,38 @@ public class TestIndexWriterExceptions extends LuceneTestCase {
    dir.close();
  }
  
+  public void testCrazyPositionIncrementGap() throws Exception {
+    Directory dir = newDirectory();
+    Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
+      }
+
+      @Override
+      public int getPositionIncrementGap(String fieldName) {
+        return -2;
+      }
+    };
+    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+    // add good document
+    Document doc = new Document();
+    iw.addDocument(doc);
+    try {
+      doc.add(newTextField("foo", "bar", Field.Store.NO));
+      doc.add(newTextField("foo", "bar", Field.Store.NO));
+      iw.addDocument(doc);
+      fail("didn't get expected exception");
+    } catch (IllegalArgumentException expected) {}
+    iw.shutdown();
+
+    // make sure we see our good doc
+    DirectoryReader r = DirectoryReader.open(dir);   
+    assertEquals(1, r.numDocs());
+    r.close();
+    dir.close();
+  }
+  
  // TODO: we could also check isValid, to catch "broken" bytesref values, might be too much?
  
  static class UOEDirectory extends RAMDirectory {
--- a/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPostingsOffsets.java
@ -27,8 +27,10 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockPayloadAnalyzer;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Analyzer.TokenStreamComponents;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@ -447,6 +449,40 @@ public class TestPostingsOffsets extends LuceneTestCase {
      });
  }
  
+  public void testCrazyOffsetGap() throws Exception {
+    Directory dir = newDirectory();
+    Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        return new TokenStreamComponents(new MockTokenizer(MockTokenizer.KEYWORD, false));
+      }
+
+      @Override
+      public int getOffsetGap(String fieldName) {
+        return -10;
+      }
+    };
+    IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
+    // add good document
+    Document doc = new Document();
+    iw.addDocument(doc);
+    try {
+      FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
+      ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+      doc.add(new Field("foo", "bar", ft));
+      doc.add(new Field("foo", "bar", ft));
+      iw.addDocument(doc);
+      fail("didn't get expected exception");
+    } catch (IllegalArgumentException expected) {}
+    iw.shutdown();
+
+    // make sure we see our good doc
+    DirectoryReader r = DirectoryReader.open(dir);   
+    assertEquals(1, r.numDocs());
+    r.close();
+    dir.close();
+  }
+
  public void testLegalbutVeryLargeOffsets() throws Exception {
    Directory dir = newDirectory();
    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));