LUCENE-4127: don't allow 0 posInc for first token of indexed field

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1348606 13f79535-47bb-0310-9956-ffa450edef68
2012-06-10 14:42:34 +00:00 · 2012-06-10 14:42:34 +00:00 · cbbf7bfc6a
parent 9a6fe90dd2
commit cbbf7bfc6a
6 changed files with 33 additions and 35 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -393,6 +393,11 @@ Changes in Runtime Behavior
  any of the calls to the Analyzer throw an IOException.  QueryParseBase.analyzeRangePart()
  will throw a RuntimException if an IOException is thrown by the Analyzer.
 * LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
  the first token of an indexed field has 0 positionIncrement
  (previously it silently corrected it to 1, possibly masking bugs).
  (Robert Muir, Mike McCandless)
 API Changes
 * LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
--- a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java
@ -18,9 +18,11 @@ package org.apache.lucene.index;
 */
 import java.io.IOException;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 /**
 * Holds state for inverting all occurrences of a single
@ -87,6 +89,8 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
        // reset the TokenStream to the first token
        stream.reset();
        boolean success2 = false;
        try {
          boolean hasMoreTokens = stream.incrementToken();
@ -109,8 +113,16 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
            if (!hasMoreTokens) break;
            final int posIncr = posIncrAttribute.getPositionIncrement();
            if (posIncr < 0) {
              throw new IllegalArgumentException("position increment must be >=0 (got " + posIncr + ")");
            }
            if (fieldState.position == 0 && posIncr == 0) {
              throw new IllegalArgumentException("first position increment must be > 0 (got 0)");
            }
            int position = fieldState.position + posIncr;
            if (position > 0) {
              // NOTE: confusing: this "mirrors" the
              // position++ we do below
              position--;
            } else if (position < 0) {
              throw new IllegalArgumentException("position overflow for field '" + field.name() + "'");
@ -147,8 +159,13 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
          stream.end();
          fieldState.offset += offsetAttribute.endOffset();
          success2 = true;
        } finally {
-          stream.close();
+          if (!success2) {
            IOUtils.closeWhileHandlingException(stream);
          } else {
            stream.close();
          }
        }
        fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
@ -883,39 +883,16 @@ public class TestIndexWriter extends LuceneTestCase {
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random())));
    Document doc = new Document();
    doc.add(new TextField("field", tokens));
-    w.addDocument(doc);
+    try {
-    w.commit();
+      w.addDocument(doc);
-
+      fail("did not hit expected exception");
-    IndexReader r = DirectoryReader.open(dir);
+    } catch (IllegalArgumentException iea) {
-    IndexSearcher s = new IndexSearcher(r);
+      // expected
-    PhraseQuery pq = new PhraseQuery();
+    }
    pq.add(new Term("field", "a"));
    pq.add(new Term("field", "b"));
    pq.add(new Term("field", "c"));
    ScoreDoc[] hits = s.search(pq, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    Query q = new SpanTermQuery(new Term("field", "a"));
    hits = s.search(q, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(s.getIndexReader(),
                                                                MultiFields.getLiveDocs(s.getIndexReader()),
                                                                "field",
                                                                new BytesRef("a"),
                                                                false);
    assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    assertEquals(1, tps.freq());
    assertEquals(0, tps.nextPosition());
    w.close();
    r.close();
    dir.close();
  }
  // LUCENE-1219
  public void testBinaryFieldOffsetLength() throws IOException {
    Directory dir = newDirectory();
--- a/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPositionIncrement.java
@ -61,7 +61,7 @@ public class TestPositionIncrement extends LuceneTestCase {
        return new TokenStreamComponents(new Tokenizer(reader) {
          // TODO: use CannedTokenStream
          private final String[] TOKENS = {"1", "2", "3", "4", "5"};
-          private final int[] INCREMENTS = {0, 2, 1, 0, 1};
+          private final int[] INCREMENTS = {1, 2, 1, 0, 1};
          private int i = 0;
          PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
@ -222,8 +222,7 @@ public class TestPositionIncrement extends LuceneTestCase {
    assertTrue(tp.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
    // "a" occurs 4 times
    assertEquals(4, tp.freq());
-    int expected = 0;
+    assertEquals(0, tp.nextPosition());
    assertEquals(expected, tp.nextPosition());
    assertEquals(1, tp.nextPosition());
    assertEquals(3, tp.nextPosition());
    assertEquals(6, tp.nextPosition());
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@ -546,7 +546,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    // we write here (e.g., to write parent+2), and need to do a workaround
    // in the reader (which knows that anyway only category 0 has a parent
    // -1).    
-    parentStream.set(parent + 1);
+    parentStream.set(Math.max(parent+1, 1));
    Document d = new Document();
    d.add(parentStreamField);
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
@ -69,7 +69,7 @@ final class MockPayloadFilter extends TokenFilter {
    if (input.incrementToken()) {
      payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
      int posIncr;
-      if (i % 2 == 1) {
+      if (pos == 0 || i % 2 == 1) {
        posIncr = 1;
      } else {
        posIncr = 0;