From 80a79f5bee02a855b11157cc789cc02870eee11f Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Sat, 30 May 2009 09:36:10 +0000 Subject: [PATCH] LUCENE-1542: properly index first token(s) with 0 position increment git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@780220 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 21 +++ common-build.xml | 2 +- .../lucene/index/DocInverterPerField.java | 9 +- .../apache/lucene/index/DocumentsWriter.java | 8 + .../index/DocumentsWriterThreadState.java | 1 + .../org/apache/lucene/index/IndexWriter.java | 16 ++ .../apache/lucene/index/TestIndexWriter.java | 2 +- .../lucene/search/TestPositionIncrement.java | 178 +++++++++++++++++- 8 files changed, 230 insertions(+), 7 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 48f49581935..41ad6d1c878 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -71,6 +71,17 @@ Changes in runtime behavior with SortField.FIELD_DOC (it was unnecessary as Lucene breaks ties internally by docID). (Shai Erera via Michael McCandless) + 6. LUCENE-1542: When the first token(s) have 0 position increment, + IndexWriter used to incorrectly record the position as -1, if no + payload is present, or Integer.MAX_VALUE if a payload is present. + This causes positional queries to fail to match. The bug is now + fixed, but if your app relies on the buggy behavior then you must + call IndexWriter.setAllowMinus1Position(). That API is deprecated + so you must fix your application, and rebuild your index, to not + rely on this behavior by the 3.0 release of Lucene. (Jonathan + Mamou, Mark Miller via Mike McCandless) + + API Changes 1. LUCENE-1419: Add expert API to set custom indexing chain. This API is @@ -186,6 +197,16 @@ Bug fixes 10. LUCENE-1647: Fix case where IndexReader.undeleteAll would cause the segment's deletion count to be incorrect. (Mike McCandless) +11. LUCENE-1542: When the first token(s) have 0 position increment, + IndexWriter used to incorrectly record the position as -1, if no + payload is present, or Integer.MAX_VALUE if a payload is present. + This causes positional queries to fail to match. The bug is now + fixed, but if your app relies on the buggy behavior then you must + call IndexWriter.setAllowMinus1Position(). That API is deprecated + so you must fix your application, and rebuild your index, to not + rely on this behavior by the 3.0 release of Lucene. (Jonathan + Mamou, Mark Miller via Mike McCandless) + New features 1. LUCENE-1411: Added expert API to open an IndexWriter on a prior diff --git a/common-build.xml b/common-build.xml index b78fcb99da4..aa7c1926341 100644 --- a/common-build.xml +++ b/common-build.xml @@ -42,7 +42,7 @@ - + diff --git a/src/java/org/apache/lucene/index/DocInverterPerField.java b/src/java/org/apache/lucene/index/DocInverterPerField.java index 454090f97ea..140cac026b6 100644 --- a/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -126,6 +126,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField { // reset the TokenStream to the first token stream.reset(); + // deprecated + final boolean allowMinus1Position = docState.allowMinus1Position; + try { int offsetEnd = fieldState.offset-1; @@ -162,7 +165,11 @@ final class DocInverterPerField extends DocFieldConsumerPerField { } final int posIncr = posIncrAttribute.getPositionIncrement(); - fieldState.position += posIncr - 1; + fieldState.position += posIncr; + if (allowMinus1Position || fieldState.position > 0) { + fieldState.position--; + } + if (posIncr == 0) fieldState.numOverlap++; diff --git a/src/java/org/apache/lucene/index/DocumentsWriter.java b/src/java/org/apache/lucene/index/DocumentsWriter.java index f40f84888f9..ace1746dd87 100644 --- a/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -150,6 +150,9 @@ final class DocumentsWriter { Document doc; String maxTermPrefix; + // deprecated + boolean allowMinus1Position; + // Only called by asserts public boolean testPoint(String name) { return docWriter.writer.testPoint(name); @@ -298,6 +301,11 @@ final class DocumentsWriter { threadStates[i].docState.similarity = similarity; } + synchronized void setAllowMinus1Position() { + for(int i=0;i