LUCENE-9454: Remove version field on Analyzer (#154)

Version switching on Analyzer behaviour should be implemented
in the various component factories, rather than on a mutable
setting on Analyzer itself.
This commit is contained in:
Alan Woodward 2021-05-26 17:34:01 +01:00 committed by GitHub
parent 16104090fb
commit 1e7d8146ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 35 additions and 22 deletions

View File

@ -119,6 +119,8 @@ API Changes
* LUCENE-9204: SpanQuery and its subclasses have been moved from core/ into the
queries/ module. (Alan Woodward)
* LUCENE-9454: Analyzer no longer has a mutable version field. (Alan Woodward)
Improvements
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)

View File

@ -129,7 +129,6 @@ public final class CustomAnalyzer extends Analyzer {
private final Integer posIncGap, offsetGap;
CustomAnalyzer(
Version defaultMatchVersion,
CharFilterFactory[] charFilters,
TokenizerFactory tokenizer,
TokenFilterFactory[] tokenFilters,
@ -140,9 +139,6 @@ public final class CustomAnalyzer extends Analyzer {
this.tokenFilters = tokenFilters;
this.posIncGap = posIncGap;
this.offsetGap = offsetGap;
if (defaultMatchVersion != null) {
setVersion(defaultMatchVersion);
}
}
@Override
@ -590,7 +586,6 @@ public final class CustomAnalyzer extends Analyzer {
throw new IllegalStateException("You have to set at least a tokenizer.");
}
return new CustomAnalyzer(
defaultMatchVersion.get(),
charFilters.toArray(new CharFilterFactory[charFilters.size()]),
tokenizer.get(),
tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]),

View File

@ -26,6 +26,7 @@ import java.util.Map;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.CharFilterFactory;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
@ -69,7 +70,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
assertEquals(0, a.getPositionIncrementGap("dummy"));
assertEquals(1, a.getOffsetGap("dummy"));
assertSame(Version.LATEST, a.getVersion());
assertAnalyzesTo(
a, "foo bar FOO BAR", new String[] {"foo", "bar", "foo", "bar"}, new int[] {1, 1, 1, 1});
@ -97,7 +97,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
assertEquals(0, a.getPositionIncrementGap("dummy"));
assertEquals(1, a.getOffsetGap("dummy"));
assertSame(Version.LATEST, a.getVersion());
assertAnalyzesTo(
a, "foo bar FOO BAR", new String[] {"foo", "bar", "foo", "bar"}, new int[] {1, 1, 1, 1});
@ -109,6 +108,23 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
a.close();
}
public void testVersionAwareFilter() throws Exception {
CustomAnalyzer a =
CustomAnalyzer.builder()
.withDefaultMatchVersion(Version.LUCENE_8_0_0)
.withTokenizer(StandardTokenizerFactory.class)
.addTokenFilter(DummyVersionAwareTokenFilterFactory.class)
.build();
assertAnalyzesTo(a, "HELLO WORLD", new String[] {"HELLO", "WORLD"});
CustomAnalyzer b =
CustomAnalyzer.builder()
.withTokenizer(StandardTokenizerFactory.class)
.addTokenFilter(DummyVersionAwareTokenFilterFactory.class)
.build();
assertAnalyzesTo(b, "HELLO WORLD", new String[] {"hello", "world"});
}
public void testFactoryHtmlStripClassicFolding() throws Exception {
CustomAnalyzer a =
CustomAnalyzer.builder()
@ -131,7 +147,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
assertEquals(100, a.getPositionIncrementGap("dummy"));
assertEquals(1000, a.getOffsetGap("dummy"));
assertSame(LUCENE_8_0_0, a.getVersion());
assertAnalyzesTo(
a,
@ -168,7 +183,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
assertEquals(100, a.getPositionIncrementGap("dummy"));
assertEquals(1000, a.getOffsetGap("dummy"));
assertSame(LUCENE_8_0_0, a.getVersion());
assertAnalyzesTo(
a,
@ -204,7 +218,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
assertSame(StopFilterFactory.class, tokenFilters.get(0).getClass());
assertEquals(0, a.getPositionIncrementGap("dummy"));
assertEquals(1, a.getOffsetGap("dummy"));
assertSame(Version.LATEST, a.getVersion());
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
a.close();
@ -486,6 +499,21 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
}
}
public static class DummyVersionAwareTokenFilterFactory extends TokenFilterFactory {
public DummyVersionAwareTokenFilterFactory(Map<String, String> args) {
super(args);
}
@Override
public TokenStream create(TokenStream input) {
if (luceneMatchVersion.equals(Version.LUCENE_8_0_0)) {
return input;
}
return new LowerCaseFilter(input);
}
}
public void testNormalization() throws IOException {
CustomAnalyzer analyzer1 =
CustomAnalyzer.builder()

View File

@ -30,7 +30,6 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.Version;
/**
* An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting
@ -86,7 +85,6 @@ import org.apache.lucene.util.Version;
public abstract class Analyzer implements Closeable {
private final ReuseStrategy reuseStrategy;
private Version version = Version.LATEST;
// non final as it gets nulled if closed; pkg private for access by ReuseStrategy's final helper
// methods:
@ -329,16 +327,6 @@ public abstract class Analyzer implements Closeable {
return reuseStrategy;
}
/** Set the version of Lucene this analyzer should mimic the behavior for for analysis. */
public void setVersion(Version v) {
version = v; // TODO: make write once?
}
/** Return the version of Lucene this analyzer will mimic the behavior of for analysis. */
public Version getVersion() {
return version;
}
/** Frees persistent resources used by this Analyzer */
@Override
public void close() {