LUCENE-1678: if a subclass of a core Analyzer overrides tokenStream but not reusableTokenStream, then the core analyzer's reusableTokenStream will now fallback to tokenStream to fix the previous back-compat break

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@793966 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-07-14 16:32:18 +00:00
parent 70b7e0b41b
commit 23e2a395c8
6 changed files with 74 additions and 4 deletions

View File

@ -135,6 +135,13 @@ Changes in runtime behavior
true in all Lucene releases before 2.3, but was broken in 2.3 and
2.4, and is now fixed in 2.9. (Mike McCandless)
11. LUCENE-1678: The addition of Analyzer.reusableTokenStream
accidentally broke back compatibility of external analyzers that
subclassed core analyzers that implemented tokenStream but not
reusableTokenStream. This is now fixed, such that if
reusableTokenStream is invoked on such a subclass, that method
will forcefully fallback to tokenStream. (Mike McCandless)
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import java.io.IOException;
import java.lang.reflect.Method;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.store.AlreadyClosedException;
@ -32,7 +33,8 @@ import org.apache.lucene.store.AlreadyClosedException;
*/
public abstract class Analyzer {
/** Creates a TokenStream which tokenizes all the text in the provided
* Reader. Must be able to handle null field name for backward compatibility.
* Reader. Must be able to handle null field name for
* backward compatibility.
*/
public abstract TokenStream tokenStream(String fieldName, Reader reader);
@ -79,6 +81,29 @@ public abstract class Analyzer {
}
}
protected boolean overridesTokenStreamMethod;
/** @deprecated This is only present to preserve
* back-compat of classes that subclass a core analyzer
* and override tokenStream but not reusableTokenStream */
protected void setOverridesTokenStreamMethod(Class baseClass) {
final Class[] params = new Class[2];
params[0] = String.class;
params[1] = Reader.class;
try {
Method m = this.getClass().getMethod("tokenStream", params);
if (m != null) {
overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
} else {
overridesTokenStreamMethod = false;
}
} catch (NoSuchMethodException nsme) {
overridesTokenStreamMethod = false;
}
}
/**
* Invoked before indexing a Fieldable instance if

View File

@ -25,18 +25,27 @@ import java.io.Reader;
* for data like zip codes, ids, and some product names.
*/
public class KeywordAnalyzer extends Analyzer {
public KeywordAnalyzer() {
setOverridesTokenStreamMethod(KeywordAnalyzer.class);
}
public TokenStream tokenStream(String fieldName,
final Reader reader) {
return new KeywordTokenizer(reader);
}
public TokenStream reusableTokenStream(String fieldName,
final Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
if (tokenizer == null) {
tokenizer = new KeywordTokenizer(reader);
setPreviousTokenStream(tokenizer);
} else
tokenizer.reset(reader);
tokenizer.reset(reader);
return tokenizer;
}
}

View File

@ -55,6 +55,7 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
*/
public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
this.defaultAnalyzer = defaultAnalyzer;
setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class);
}
/**
@ -77,6 +78,12 @@ public class PerFieldAnalyzerWrapper extends Analyzer {
}
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName);
if (analyzer == null)
analyzer = defaultAnalyzer;

View File

@ -44,7 +44,6 @@ import java.util.Set;
*/
public class StandardAnalyzer extends Analyzer {
private Set stopSet;
private Version matchVersion;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
@ -262,7 +261,7 @@ public class StandardAnalyzer extends Analyzer {
}
private final void init(Version matchVersion) {
this.matchVersion = matchVersion;
setOverridesTokenStreamMethod(StandardAnalyzer.class);
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
enableStopPositionIncrements = true;
} else {
@ -314,6 +313,12 @@ public class StandardAnalyzer extends Analyzer {
/** @deprecated Use {@link #tokenStream} instead */
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
if (overridesTokenStreamMethod) {
// LUCENE-1678: force fallback to tokenStream() if we
// have been subclassed and that subclass overrides
// tokenStream but not reusableTokenStream
return tokenStream(fieldName, reader);
}
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();

View File

@ -19,8 +19,10 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.StringReader;
import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
@ -130,6 +132,21 @@ public class TestAnalyzers extends LuceneTestCase {
x = StandardTokenizer.CJ;
String[] y = StandardTokenizer.TOKEN_TYPES;
}
private static class MyStandardAnalyzer extends StandardAnalyzer {
public TokenStream tokenStream(String field, Reader reader) {
return new WhitespaceAnalyzer().tokenStream(field, reader);
}
}
public void testSubclassOverridingOnlyTokenStream() throws Throwable {
Analyzer a = new MyStandardAnalyzer();
TokenStream ts = a.reusableTokenStream("field", new StringReader("the"));
// StandardAnalyzer will discard "the" (it's a
// stopword), by my subclass will not:
assertTrue(ts.incrementToken());
assertFalse(ts.incrementToken());
}
}
class PayloadSetter extends TokenFilter {