Improve backwards compatibility handling for NGram / EdgeNGram analysis
The '"side" : "back"' parameter is not supported anymore on EdgeNGramTokenizer if the mapping is created with 0.90.2 / Lucene 4.3 The 'EdgeNgramTokenFilter' handles this automatically wrapping the 'EdgeNgramTokenFilter' in a 'ReverseTokenFilter' yet with tokenizers this optimization is not possible. This commit also add a more verbose error message how to work around this limitation. Closes #3489
This commit is contained in:
parent
becbbf53d5
commit
4a15106d6a
|
@ -34,6 +34,7 @@ public abstract class AbstractTokenizerFactory extends AbstractIndexComponent im
|
||||||
|
|
||||||
protected final Version version;
|
protected final Version version;
|
||||||
|
|
||||||
|
|
||||||
public AbstractTokenizerFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
|
public AbstractTokenizerFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
|
||||||
super(index, indexSettings);
|
super(index, indexSettings);
|
||||||
this.name = name;
|
this.name = name;
|
||||||
|
|
|
@ -19,13 +19,14 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.index.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.*;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
|
||||||
|
import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
|
||||||
|
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -44,18 +45,25 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final EdgeNGramTokenFilter.Side side;
|
private final EdgeNGramTokenFilter.Side side;
|
||||||
|
|
||||||
|
private org.elasticsearch.Version esVersion;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public EdgeNGramTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public EdgeNGramTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||||
|
this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public TokenStream create(TokenStream tokenStream) {
|
public TokenStream create(TokenStream tokenStream) {
|
||||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||||
if (version.onOrAfter(Version.LUCENE_43)) {
|
/*
|
||||||
|
* We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
|
||||||
|
* Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
|
||||||
|
*/
|
||||||
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
|
||||||
TokenStream result = tokenStream;
|
TokenStream result = tokenStream;
|
||||||
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
// side=BACK is not supported anymore but applying ReverseStringFilter up-front and after the token filter has the same effect
|
||||||
if (side == Side.BACK) {
|
if (side == Side.BACK) {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.ngram.Lucene43EdgeNGramTokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -49,6 +50,9 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
|
|
||||||
private final CharMatcher matcher;
|
private final CharMatcher matcher;
|
||||||
|
|
||||||
|
protected org.elasticsearch.Version esVersion;
|
||||||
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public EdgeNGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
public EdgeNGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||||
super(index, indexSettings, name, settings);
|
super(index, indexSettings, name, settings);
|
||||||
|
@ -56,17 +60,23 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.side = Lucene43EdgeNGramTokenizer.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
this.side = Lucene43EdgeNGramTokenizer.Side.getSide(settings.get("side", Lucene43EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||||
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
||||||
|
this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
/*
|
||||||
|
* We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
|
||||||
|
* Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
|
||||||
|
*/
|
||||||
if (side == Lucene43EdgeNGramTokenizer.Side.BACK) {
|
if (side == Lucene43EdgeNGramTokenizer.Side.BACK) {
|
||||||
throw new ElasticSearchIllegalArgumentException("side=BACK is not supported anymore. Please fix your analysis chain or use"
|
throw new ElasticSearchIllegalArgumentException("side=back is not supported anymore. Please fix your analysis chain or use"
|
||||||
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs.");
|
+ " an older compatibility version (<=4.2) but beware that it might cause highlighting bugs."
|
||||||
|
+ " To obtain the same behavior as the previous version please use \"edgeNGram\" filter which still supports side=back"
|
||||||
|
+ " in combination with a \"keyword\" tokenizer");
|
||||||
}
|
}
|
||||||
// LUCENE MONITOR: this token filter is a copy from lucene trunk and should go away once we upgrade to lucene 4.4
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
|
||||||
if (matcher == null) {
|
if (matcher == null) {
|
||||||
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
|
return new EdgeNGramTokenizer(version, reader, minGram, maxGram);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.ngram.Lucene43NGramTokenizer;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.Version;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -45,6 +46,7 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
private final int minGram;
|
private final int minGram;
|
||||||
private final int maxGram;
|
private final int maxGram;
|
||||||
private final CharMatcher matcher;
|
private final CharMatcher matcher;
|
||||||
|
private org.elasticsearch.Version esVersion;
|
||||||
|
|
||||||
static final Map<String, CharMatcher> MATCHERS;
|
static final Map<String, CharMatcher> MATCHERS;
|
||||||
|
|
||||||
|
@ -94,13 +96,18 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||||
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
this.matcher = parseTokenChars(settings.getAsArray("token_chars"));
|
||||||
|
this.esVersion = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
@Override
|
@Override
|
||||||
public Tokenizer create(Reader reader) {
|
public Tokenizer create(Reader reader) {
|
||||||
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // we supported it since 4.3
|
if (version.onOrAfter(Version.LUCENE_43) && esVersion.onOrAfter(org.elasticsearch.Version.V_0_90_2)) {
|
||||||
if (version.onOrAfter(Version.LUCENE_44)) {
|
/*
|
||||||
|
* We added this in 0.90.2 but 0.90.1 used LUCENE_43 already so we can not rely on the lucene version.
|
||||||
|
* Yet if somebody uses 0.90.2 or higher with a prev. lucene version we should also use the deprecated version.
|
||||||
|
*/
|
||||||
|
final Version version = this.version == Version.LUCENE_43 ? Version.LUCENE_44 : this.version; // always use 4.4 or higher
|
||||||
if (matcher == null) {
|
if (matcher == null) {
|
||||||
return new NGramTokenizer(version, reader, minGram, maxGram);
|
return new NGramTokenizer(version, reader, minGram, maxGram);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -22,17 +22,33 @@ package org.elasticsearch.test.unit.index.analysis;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope.Scope;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.ngram.*;
|
||||||
|
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
|
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.lang.reflect.Modifier;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
|
||||||
@ThreadLeakScope(Scope.NONE)
|
@ThreadLeakScope(Scope.NONE)
|
||||||
public class NGramTokenizerFactoryTests extends BaseTokenStreamTestCase {
|
public class NGramTokenizerFactoryTests extends BaseTokenStreamTestCase {
|
||||||
|
@ -86,4 +102,135 @@ public class NGramTokenizerFactoryTests extends BaseTokenStreamTestCase {
|
||||||
new String[] {" a", " a!"});
|
new String[] {" a", " a!"});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackwardsCompatibilityEdgeNgramTokenizer() throws IllegalArgumentException, IllegalAccessException {
|
||||||
|
int iters = atLeast(20);
|
||||||
|
final Index index = new Index("test");
|
||||||
|
final String name = "ngr";
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
Version v = randomVersion(random());
|
||||||
|
if (v.onOrAfter(Version.V_0_90_2)) {
|
||||||
|
Builder builder = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit");
|
||||||
|
boolean compatVersion = false;
|
||||||
|
if ((compatVersion = random().nextBoolean())) {
|
||||||
|
builder.put("version", "4." + random().nextInt(3));
|
||||||
|
builder.put("side", "back");
|
||||||
|
}
|
||||||
|
Settings settings = builder.build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
Tokenizer edgeNGramTokenizer = new EdgeNGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader(
|
||||||
|
"foo bar"));
|
||||||
|
if (compatVersion) {
|
||||||
|
assertThat(edgeNGramTokenizer, instanceOf(Lucene43EdgeNGramTokenizer.class));
|
||||||
|
} else {
|
||||||
|
assertThat(edgeNGramTokenizer, instanceOf(EdgeNGramTokenizer.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
Settings settings = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3).put("side", "back").build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
Tokenizer edgeNGramTokenizer = new EdgeNGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader(
|
||||||
|
"foo bar"));
|
||||||
|
assertThat(edgeNGramTokenizer, instanceOf(Lucene43EdgeNGramTokenizer.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Settings settings = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3).put("side", "back").build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
|
||||||
|
try {
|
||||||
|
new EdgeNGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader("foo bar"));
|
||||||
|
fail("should fail side:back is not supported anymore");
|
||||||
|
} catch (ElasticSearchIllegalArgumentException ex) {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackwardsCompatibilityNgramTokenizer() throws IllegalArgumentException, IllegalAccessException {
|
||||||
|
int iters = atLeast(20);
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
final Index index = new Index("test");
|
||||||
|
final String name = "ngr";
|
||||||
|
Version v = randomVersion(random());
|
||||||
|
if (v.onOrAfter(Version.V_0_90_2)) {
|
||||||
|
Builder builder = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit");
|
||||||
|
boolean compatVersion = false;
|
||||||
|
if ((compatVersion = random().nextBoolean())) {
|
||||||
|
builder.put("version", "4." + random().nextInt(3));
|
||||||
|
}
|
||||||
|
Settings settings = builder.build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
Tokenizer nGramTokenizer = new NGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader(
|
||||||
|
"foo bar"));
|
||||||
|
if (compatVersion) {
|
||||||
|
assertThat(nGramTokenizer, instanceOf(Lucene43NGramTokenizer.class));
|
||||||
|
} else {
|
||||||
|
assertThat(nGramTokenizer, instanceOf(NGramTokenizer.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
Settings settings = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3).build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
Tokenizer nGramTokenizer = new NGramTokenizerFactory(index, indexSettings, name, settings).create(new StringReader(
|
||||||
|
"foo bar"));
|
||||||
|
assertThat(nGramTokenizer, instanceOf(Lucene43NGramTokenizer.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBackwardsCompatibilityEdgeNgramTokenFilter() throws IllegalArgumentException, IllegalAccessException {
|
||||||
|
int iters = atLeast(20);
|
||||||
|
for (int i = 0; i < iters; i++) {
|
||||||
|
final Index index = new Index("test");
|
||||||
|
final String name = "ngr";
|
||||||
|
Version v = randomVersion(random());
|
||||||
|
if (v.onOrAfter(Version.V_0_90_2)) {
|
||||||
|
Builder builder = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3);
|
||||||
|
boolean compatVersion = false;
|
||||||
|
if ((compatVersion = random().nextBoolean())) {
|
||||||
|
builder.put("version", "4." + random().nextInt(3));
|
||||||
|
}
|
||||||
|
boolean reverse = random().nextBoolean();
|
||||||
|
if (reverse) {
|
||||||
|
builder.put("side", "back");
|
||||||
|
}
|
||||||
|
Settings settings = builder.build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(index, indexSettings, name, settings).create(new MockTokenizer(new StringReader(
|
||||||
|
"foo bar")));
|
||||||
|
if (compatVersion) {
|
||||||
|
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
|
||||||
|
} else if (reverse && !compatVersion){
|
||||||
|
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
|
||||||
|
} else {
|
||||||
|
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
Builder builder = ImmutableSettings.builder().put("min_gram", 2).put("max_gram", 3);
|
||||||
|
boolean reverse = random().nextBoolean();
|
||||||
|
if (reverse) {
|
||||||
|
builder.put("side", "back");
|
||||||
|
}
|
||||||
|
Settings settings = builder.build();
|
||||||
|
Settings indexSettings = ImmutableSettings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
|
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(index, indexSettings, name, settings).create(new MockTokenizer(new StringReader(
|
||||||
|
"foo bar")));
|
||||||
|
assertThat(edgeNGramTokenFilter, instanceOf(EdgeNGramTokenFilter.class));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Version randomVersion(Random random) throws IllegalArgumentException, IllegalAccessException {
|
||||||
|
Field[] declaredFields = Version.class.getDeclaredFields();
|
||||||
|
List<Field> versionFields = new ArrayList<Field>();
|
||||||
|
for (Field field : declaredFields) {
|
||||||
|
if ((field.getModifiers() & Modifier.STATIC) != 0 && field.getName().startsWith("V_") && field.getType() == Version.class) {
|
||||||
|
versionFields.add(field);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (Version) versionFields.get(random.nextInt(versionFields.size())).get(Version.class);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue