SOLR-11976: TokenizerChain.normalize was only considering the first MultiTermAwareComponent

This commit is contained in:
David Smiley 2018-03-08 22:30:39 -05:00
parent ab4cd42903
commit 588e19eda1
3 changed files with 52 additions and 4 deletions

View File

@ -257,6 +257,10 @@ Bug Fixes
* SOLR-12061: Fix substitution bug in API V1 to V2 migration when using SolrJ with V2 API. (Tomás Fernánadez Löbbe)
* SOLR-11976: TokenizerChain.normalize: only the first filter that is a MultiTermAwareComponent was participating
in normalization instead of all. This bug normally doesn't matter since TextField doesn't call it.
(Tim Allison via David Smiley)
Optimizations
----------------------

View File

@ -16,14 +16,15 @@
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.*;
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import java.io.Reader;
/**
* An analyzer that uses a tokenizer and a list of token filters to
* create a TokenStream.
@ -113,7 +114,7 @@ public final class TokenizerChain extends SolrAnalyzer {
for (TokenFilterFactory filter : filters) {
if (filter instanceof MultiTermAwareComponent) {
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
result = filter.create(in);
result = filter.create(result);
}
}
return result;

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.Collections;
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Test;
public class TokenizerChainTest extends SolrTestCaseJ4 {
@Test
public void testNormalization() throws Exception {
String fieldName = "f";
TokenFilterFactory[] tff = new TokenFilterFactory[2];
tff[0] = new LowerCaseFilterFactory(Collections.EMPTY_MAP);
tff[1] = new ASCIIFoldingFilterFactory(Collections.EMPTY_MAP);
TokenizerChain tokenizerChain = new TokenizerChain(
new MockTokenizerFactory(Collections.EMPTY_MAP),
tff);
assertEquals(new BytesRef("fooba"),
tokenizerChain.normalize(fieldName, "FOOB\u00c4"));
}
}