LUCENE-4981: Deprecate PositionFilter.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1483403 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2013-05-16 15:11:08 +00:00
parent d6565f839e
commit eefd3884b1
5 changed files with 54 additions and 3 deletions

View File

@ -76,6 +76,12 @@ Changes in backwards compatibility policy
* LUCENE-3907: EdgeNGramTokenFilter does not support backward grams and does
not update offsets anymore. (Adrien Grand)
* LUCENE-4981: PositionFilter is now deprecated as it can corrupt token stream
graphs. Since it main use-case was to make query parsers generate boolean
queries instead of phrase queries, it is now advised to use
QueryParser.setAutoGeneratePhraseQueries(false) (for simple cases) or to
override QueryParser.newFieldQuery. (Adrien Grand, Steve Rowe)
Bug Fixes
* LUCENE-4997: Internal test framework's tests are sensitive to previous

View File

@ -22,11 +22,19 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.queryparser.classic.QueryParser;
/** Set the positionIncrement of all tokens to the "positionIncrement",
* except the first return token which retains its original positionIncrement value.
* The default positionIncrement value is zero.
* @deprecated (4.4) PositionFilter makes {@link TokenStream} graphs inconsistent
* which can cause highlighting bugs. Its main use-case being to make
* {@link QueryParser} generate boolean queries instead of phrase
* queries, it is now advised to use
* {@link QueryParser#setAutoGeneratePhraseQueries(boolean) QueryParser.setAutoGeneratePhraseQueries(false)}
* (for simple cases) or to override {@link QueryParser#newFieldQuery}.
*/
@Deprecated
public final class PositionFilter extends TokenFilter {
/** Position increment to assign to all but the first token - default = 0 */

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.position;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.position.PositionFilter;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.util.Version;
import java.util.Map;
@ -37,7 +38,9 @@ import java.util.Map;
*
* @see org.apache.lucene.analysis.position.PositionFilter
* @since solr 1.4
* @deprecated (4.4)
*/
@Deprecated
public class PositionFilterFactory extends TokenFilterFactory {
private final int positionIncrement;
@ -48,6 +51,9 @@ public class PositionFilterFactory extends TokenFilterFactory {
if (!args.isEmpty()) {
throw new IllegalArgumentException("Unknown parameters: " + args);
}
if (luceneMatchVersion != null && luceneMatchVersion.onOrAfter(Version.LUCENE_44)) {
throw new IllegalArgumentException("PositionFilter is deprecated as of Lucene 4.4. You should either fix your code to not use it or use Lucene 4.3 version compatibility");
}
}
@Override

View File

@ -174,8 +174,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
PathHierarchyTokenizer.class,
HyphenationCompoundWordTokenFilter.class,
DictionaryCompoundWordTokenFilter.class,
// TODO: corrumpts graphs (offset consistency check):
PositionFilter.class,
// TODO: it seems to mess up offsets!?
WikipediaTokenizer.class,
// TODO: doesn't handle graph inputs

View File

@ -19,13 +19,16 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.position.PositionFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Random;
@ -101,9 +104,39 @@ public class EdgeNGramTokenFilterTest extends BaseTokenStreamTestCase {
false);
}
private static class PositionFilter extends TokenFilter {
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
private boolean started;
PositionFilter(final TokenStream input) {
super(input);
}
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (started) {
posIncrAtt.setPositionIncrement(0);
} else {
started = true;
}
return true;
} else {
return false;
}
}
@Override
public void reset() throws IOException {
super.reset();
started = false;
}
}
public void testFirstTokenPositionIncrement() throws Exception {
TokenStream ts = new MockTokenizer(new StringReader("a abc"), MockTokenizer.WHITESPACE, false);
ts = new PositionFilter(ts, 0); // All but first token will get 0 position increment
ts = new PositionFilter(ts); // All but first token will get 0 position increment
EdgeNGramTokenFilter filter = new EdgeNGramTokenFilter(TEST_VERSION_CURRENT, ts, 2, 3);
// The first token "a" will not be output, since it's smaller than the mingram size of 2.
// The second token on input to EdgeNGramTokenFilter will have position increment of 0,