mirror of https://github.com/apache/lucene.git
LUCENE-1678: if a subclass of a core Analyzer overrides tokenStream but not reusableTokenStream, then the core analyzer's reusableTokenStream will now fallback to tokenStream to fix the previous back-compat break
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@793966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70b7e0b41b
commit
23e2a395c8
|
@ -135,6 +135,13 @@ Changes in runtime behavior
|
|||
true in all Lucene releases before 2.3, but was broken in 2.3 and
|
||||
2.4, and is now fixed in 2.9. (Mike McCandless)
|
||||
|
||||
11. LUCENE-1678: The addition of Analyzer.reusableTokenStream
|
||||
accidentally broke back compatibility of external analyzers that
|
||||
subclassed core analyzers that implemented tokenStream but not
|
||||
reusableTokenStream. This is now fixed, such that if
|
||||
reusableTokenStream is invoked on such a subclass, that method
|
||||
will forcefully fallback to tokenStream. (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
@ -32,7 +33,8 @@ import org.apache.lucene.store.AlreadyClosedException;
|
|||
*/
|
||||
public abstract class Analyzer {
|
||||
/** Creates a TokenStream which tokenizes all the text in the provided
|
||||
* Reader. Must be able to handle null field name for backward compatibility.
|
||||
* Reader. Must be able to handle null field name for
|
||||
* backward compatibility.
|
||||
*/
|
||||
public abstract TokenStream tokenStream(String fieldName, Reader reader);
|
||||
|
||||
|
@ -79,6 +81,29 @@ public abstract class Analyzer {
|
|||
}
|
||||
}
|
||||
|
||||
protected boolean overridesTokenStreamMethod;
|
||||
|
||||
/** @deprecated This is only present to preserve
|
||||
* back-compat of classes that subclass a core analyzer
|
||||
* and override tokenStream but not reusableTokenStream */
|
||||
protected void setOverridesTokenStreamMethod(Class baseClass) {
|
||||
|
||||
final Class[] params = new Class[2];
|
||||
params[0] = String.class;
|
||||
params[1] = Reader.class;
|
||||
|
||||
try {
|
||||
Method m = this.getClass().getMethod("tokenStream", params);
|
||||
if (m != null) {
|
||||
overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
|
||||
} else {
|
||||
overridesTokenStreamMethod = false;
|
||||
}
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
overridesTokenStreamMethod = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Invoked before indexing a Fieldable instance if
|
||||
|
|
|
@ -25,18 +25,27 @@ import java.io.Reader;
|
|||
* for data like zip codes, ids, and some product names.
|
||||
*/
|
||||
public class KeywordAnalyzer extends Analyzer {
|
||||
public KeywordAnalyzer() {
|
||||
setOverridesTokenStreamMethod(KeywordAnalyzer.class);
|
||||
}
|
||||
public TokenStream tokenStream(String fieldName,
|
||||
final Reader reader) {
|
||||
return new KeywordTokenizer(reader);
|
||||
}
|
||||
public TokenStream reusableTokenStream(String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
|
||||
if (tokenizer == null) {
|
||||
tokenizer = new KeywordTokenizer(reader);
|
||||
setPreviousTokenStream(tokenizer);
|
||||
} else
|
||||
tokenizer.reset(reader);
|
||||
tokenizer.reset(reader);
|
||||
return tokenizer;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,6 +55,7 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
|
|||
*/
|
||||
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
|
||||
this.defaultAnalyzer = defaultAnalyzer;
|
||||
setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -77,6 +78,12 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
|
|||
}
|
||||
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName);
|
||||
if (analyzer == null)
|
||||
analyzer = defaultAnalyzer;
|
||||
|
|
|
@ -44,7 +44,6 @@ import java.util.Set;
|
|||
*/
|
||||
public class StandardAnalyzer extends Analyzer {
|
||||
private Set stopSet;
|
||||
private Version matchVersion;
|
||||
|
||||
/**
|
||||
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
||||
|
@ -262,7 +261,7 @@ public class StandardAnalyzer extends Analyzer {
|
|||
}
|
||||
|
||||
private final void init(Version matchVersion) {
|
||||
this.matchVersion = matchVersion;
|
||||
setOverridesTokenStreamMethod(StandardAnalyzer.class);
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
|
||||
enableStopPositionIncrements = true;
|
||||
} else {
|
||||
|
@ -314,6 +313,12 @@ public class StandardAnalyzer extends Analyzer {
|
|||
|
||||
/** @deprecated Use {@link #tokenStream} instead */
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||
if (overridesTokenStreamMethod) {
|
||||
// LUCENE-1678: force fallback to tokenStream() if we
|
||||
// have been subclassed and that subclass overrides
|
||||
// tokenStream but not reusableTokenStream
|
||||
return tokenStream(fieldName, reader);
|
||||
}
|
||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
|
@ -130,6 +132,21 @@ public class TestAnalyzers extends LuceneTestCase {
|
|||
x = StandardTokenizer.CJ;
|
||||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
|
||||
private static class MyStandardAnalyzer extends StandardAnalyzer {
|
||||
public TokenStream tokenStream(String field, Reader reader) {
|
||||
return new WhitespaceAnalyzer().tokenStream(field, reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testSubclassOverridingOnlyTokenStream() throws Throwable {
|
||||
Analyzer a = new MyStandardAnalyzer();
|
||||
TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
|
||||
// StandardAnalyzer will discard "the" (it's a
|
||||
// stopword), by my subclass will not:
|
||||
assertTrue(ts.incrementToken());
|
||||
assertFalse(ts.incrementToken());
|
||||
}
|
||||
}
|
||||
|
||||
class PayloadSetter extends TokenFilter {
|
||||
|
|
Loading…
Reference in New Issue