mirror of https://github.com/apache/lucene.git
LUCENE-9454: Remove version field on Analyzer (#154)
Version switching on Analyzer behaviour should be implemented in the various component factories, rather than on a mutable setting on Analyzer itself.
This commit is contained in:
parent
16104090fb
commit
1e7d8146ff
|
@ -119,6 +119,8 @@ API Changes
|
|||
* LUCENE-9204: SpanQuery and its subclasses have been moved from core/ into the
|
||||
queries/ module. (Alan Woodward)
|
||||
|
||||
* LUCENE-9454: Analyzer no longer has a mutable version field. (Alan Woodward)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
|
||||
|
|
|
@ -129,7 +129,6 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
private final Integer posIncGap, offsetGap;
|
||||
|
||||
CustomAnalyzer(
|
||||
Version defaultMatchVersion,
|
||||
CharFilterFactory[] charFilters,
|
||||
TokenizerFactory tokenizer,
|
||||
TokenFilterFactory[] tokenFilters,
|
||||
|
@ -140,9 +139,6 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
this.tokenFilters = tokenFilters;
|
||||
this.posIncGap = posIncGap;
|
||||
this.offsetGap = offsetGap;
|
||||
if (defaultMatchVersion != null) {
|
||||
setVersion(defaultMatchVersion);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -590,7 +586,6 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
throw new IllegalStateException("You have to set at least a tokenizer.");
|
||||
}
|
||||
return new CustomAnalyzer(
|
||||
defaultMatchVersion.get(),
|
||||
charFilters.toArray(new CharFilterFactory[charFilters.size()]),
|
||||
tokenizer.get(),
|
||||
tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]),
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.CharFilter;
|
||||
import org.apache.lucene.analysis.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
@ -69,7 +70,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
|
||||
assertEquals(0, a.getPositionIncrementGap("dummy"));
|
||||
assertEquals(1, a.getOffsetGap("dummy"));
|
||||
assertSame(Version.LATEST, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(
|
||||
a, "foo bar FOO BAR", new String[] {"foo", "bar", "foo", "bar"}, new int[] {1, 1, 1, 1});
|
||||
|
@ -97,7 +97,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
|
||||
assertEquals(0, a.getPositionIncrementGap("dummy"));
|
||||
assertEquals(1, a.getOffsetGap("dummy"));
|
||||
assertSame(Version.LATEST, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(
|
||||
a, "foo bar FOO BAR", new String[] {"foo", "bar", "foo", "bar"}, new int[] {1, 1, 1, 1});
|
||||
|
@ -109,6 +108,23 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
a.close();
|
||||
}
|
||||
|
||||
public void testVersionAwareFilter() throws Exception {
|
||||
CustomAnalyzer a =
|
||||
CustomAnalyzer.builder()
|
||||
.withDefaultMatchVersion(Version.LUCENE_8_0_0)
|
||||
.withTokenizer(StandardTokenizerFactory.class)
|
||||
.addTokenFilter(DummyVersionAwareTokenFilterFactory.class)
|
||||
.build();
|
||||
assertAnalyzesTo(a, "HELLO WORLD", new String[] {"HELLO", "WORLD"});
|
||||
|
||||
CustomAnalyzer b =
|
||||
CustomAnalyzer.builder()
|
||||
.withTokenizer(StandardTokenizerFactory.class)
|
||||
.addTokenFilter(DummyVersionAwareTokenFilterFactory.class)
|
||||
.build();
|
||||
assertAnalyzesTo(b, "HELLO WORLD", new String[] {"hello", "world"});
|
||||
}
|
||||
|
||||
public void testFactoryHtmlStripClassicFolding() throws Exception {
|
||||
CustomAnalyzer a =
|
||||
CustomAnalyzer.builder()
|
||||
|
@ -131,7 +147,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
|
||||
assertEquals(100, a.getPositionIncrementGap("dummy"));
|
||||
assertEquals(1000, a.getOffsetGap("dummy"));
|
||||
assertSame(LUCENE_8_0_0, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(
|
||||
a,
|
||||
|
@ -168,7 +183,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
|
||||
assertEquals(100, a.getPositionIncrementGap("dummy"));
|
||||
assertEquals(1000, a.getOffsetGap("dummy"));
|
||||
assertSame(LUCENE_8_0_0, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(
|
||||
a,
|
||||
|
@ -204,7 +218,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertSame(StopFilterFactory.class, tokenFilters.get(0).getClass());
|
||||
assertEquals(0, a.getPositionIncrementGap("dummy"));
|
||||
assertEquals(1, a.getOffsetGap("dummy"));
|
||||
assertSame(Version.LATEST, a.getVersion());
|
||||
|
||||
assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
|
||||
a.close();
|
||||
|
@ -486,6 +499,21 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static class DummyVersionAwareTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
public DummyVersionAwareTokenFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
if (luceneMatchVersion.equals(Version.LUCENE_8_0_0)) {
|
||||
return input;
|
||||
}
|
||||
return new LowerCaseFilter(input);
|
||||
}
|
||||
}
|
||||
|
||||
public void testNormalization() throws IOException {
|
||||
CustomAnalyzer analyzer1 =
|
||||
CustomAnalyzer.builder()
|
||||
|
|
|
@ -30,7 +30,6 @@ import org.apache.lucene.store.AlreadyClosedException;
|
|||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting
|
||||
|
@ -86,7 +85,6 @@ import org.apache.lucene.util.Version;
|
|||
public abstract class Analyzer implements Closeable {
|
||||
|
||||
private final ReuseStrategy reuseStrategy;
|
||||
private Version version = Version.LATEST;
|
||||
|
||||
// non final as it gets nulled if closed; pkg private for access by ReuseStrategy's final helper
|
||||
// methods:
|
||||
|
@ -329,16 +327,6 @@ public abstract class Analyzer implements Closeable {
|
|||
return reuseStrategy;
|
||||
}
|
||||
|
||||
/** Set the version of Lucene this analyzer should mimic the behavior for for analysis. */
|
||||
public void setVersion(Version v) {
|
||||
version = v; // TODO: make write once?
|
||||
}
|
||||
|
||||
/** Return the version of Lucene this analyzer will mimic the behavior of for analysis. */
|
||||
public Version getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
/** Frees persistent resources used by this Analyzer */
|
||||
@Override
|
||||
public void close() {
|
||||
|
|
Loading…
Reference in New Issue