Allow TrimFilter to be used in custom normalizers (#27758)
AnalysisFactoryTestCase checks that the ES custom token filter multi-term awareness matches the underlying lucene factory. For the trim filter this won't be the case until LUCENE-8093 is released in 7.3, so we add a temporary exclusion Closes #27310
This commit is contained in:
parent
76771242e8
commit
af3f63616b
|
@ -246,7 +246,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
filters.add(PreConfiguredTokenFilter.singleton("stemmer", false, PorterStemFilter::new));
|
||||
// The stop filter is in lucene-core but the English stop words set is in lucene-analyzers-common
|
||||
filters.add(PreConfiguredTokenFilter.singleton("stop", false, input -> new StopFilter(input, StopAnalyzer.ENGLISH_STOP_WORDS_SET)));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("trim", true, TrimFilter::new));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
|
||||
filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TrimTokenFilterTests extends ESTokenStreamTestCase {
|
||||
|
||||
public void testNormalizer() throws IOException {
|
||||
Settings settings = Settings.builder()
|
||||
.putList("index.analysis.normalizer.my_normalizer.filter", "trim")
|
||||
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
|
||||
.build();
|
||||
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
|
||||
assertNull(analysis.indexAnalyzers.get("my_normalizer"));
|
||||
NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer");
|
||||
assertNotNull(normalizer);
|
||||
assertEquals("my_normalizer", normalizer.name());
|
||||
assertTokenStreamContents(normalizer.tokenStream("foo", " bar "), new String[] {"bar"});
|
||||
assertEquals(new BytesRef("bar"), normalizer.normalize("foo", " bar "));
|
||||
}
|
||||
|
||||
}
|
|
@ -22,6 +22,7 @@ package org.elasticsearch.indices.analysis;
|
|||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||
|
@ -462,6 +463,11 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
|
||||
Set<Object> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
|
||||
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
|
||||
classesThatShouldNotHaveMultiTermSupport.remove("token filter [trim]");
|
||||
if (Version.CURRENT.luceneVersion.onOrAfter(org.apache.lucene.util.Version.fromBits(7, 3, 0))) {
|
||||
// TODO: remove the above exclusion when we move to lucene 7.3
|
||||
assert false;
|
||||
}
|
||||
assertTrue("Pre-built components should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
|
||||
classesThatShouldNotHaveMultiTermSupport.isEmpty());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue