diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 11f47475be7..cf619f5d2e7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -76,6 +76,12 @@ Changes in backwards compatibility policy * LUCENE-3907: EdgeNGramTokenFilter does not support backward grams and does not update offsets anymore. (Adrien Grand) +* LUCENE-4981: PositionFilter is now deprecated as it can corrupt token stream + graphs. Since it main use-case was to make query parsers generate boolean + queries instead of phrase queries, it is now advised to use + QueryParser.setAutoGeneratePhraseQueries(false) (for simple cases) or to + override QueryParser.newFieldQuery. (Adrien Grand, Steve Rowe) + Bug Fixes * LUCENE-4997: Internal test framework's tests are sensitive to previous diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java index f1acca5095f..c77af32aa7a 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java @@ -22,11 +22,19 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.queryparser.classic.QueryParser; /** Set the positionIncrement of all tokens to the "positionIncrement", * except the first return token which retains its original positionIncrement value. * The default positionIncrement value is zero. + * @deprecated (4.4) PositionFilter makes {@link TokenStream} graphs inconsistent + * which can cause highlighting bugs. Its main use-case being to make + * {@link QueryParser} generate boolean queries instead of phrase + * queries, it is now advised to use + * {@link QueryParser#setAutoGeneratePhraseQueries(boolean) QueryParser.setAutoGeneratePhraseQueries(false)} + * (for simple cases) or to override {@link QueryParser#newFieldQuery}. */ +@Deprecated public final class PositionFilter extends TokenFilter { /** Position increment to assign to all but the first token - default = 0 */ diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java index 2d0e70b22d8..ec2ba5f8085 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilterFactory.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.position; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.position.PositionFilter; import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.util.Version; import java.util.Map; @@ -37,7 +38,9 @@ import java.util.Map; * * @see org.apache.lucene.analysis.position.PositionFilter * @since solr 1.4 + * @deprecated (4.4) */ +@Deprecated public class PositionFilterFactory extends TokenFilterFactory { private final int positionIncrement; @@ -48,6 +51,9 @@ public class PositionFilterFactory extends TokenFilterFactory { if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } + if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44)) { + throw new IllegalArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility"); + } } @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java index 4baefbc4428..cf2e9742ebd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java @@ -174,8 +174,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase { PathHierarchyTokenizer.class, HyphenationCompoundWordTokenFilter.class, DictionaryCompoundWordTokenFilter.class, - // TODO: corrumpts graphs (offset consistency check): - PositionFilter.class, // TODO: it seems to mess up offsets!? WikipediaTokenizer.class, // TODO: doesn't handle graph inputs diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java index 61393235022..54e74e0b705 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilterTest.java @@ -19,13 +19,16 @@ package org.apache.lucene.analysis.ngram; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.position.PositionFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.Random; @@ -101,9 +104,39 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase { false); } + private static class PositionFilter extends TokenFilter { + + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private boolean started; + + PositionFilter(final TokenStream input) { + super(input); + } + + @Override + public final boolean incrementToken() throws IOException { + if (input.incrementToken()) { + if (started) { + posIncrAtt.setPositionIncrement(0); + } else { + started = true; + } + return true; + } else { + return false; + } + } + + @Override + public void reset() throws IOException { + super.reset(); + started = false; + } + } + public void testFirstTokenPositionIncrement() throws Exception { TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false); - ts = new PositionFilter(ts, 0); // All but first token will get 0 position increment + ts = new PositionFilter(ts); // All but first token will get 0 position increment EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3); // The first token "a" will not be output, since it's smaller than the mingram size of 2. // The second token on input to EdgeNGramTokenFilter will have position increment of 0,