mirror of https://github.com/apache/lucene.git
SOLR-11976: TokenizerChain.normalize was only considering the first MultiTermAwareComponent
This commit is contained in:
parent
ab4cd42903
commit
588e19eda1
|
@ -257,6 +257,10 @@ Bug Fixes
|
|||
|
||||
* SOLR-12061: Fix substitution bug in API V1 to V2 migration when using SolrJ with V2 API. (Tomás Fernánadez Löbbe)
|
||||
|
||||
* SOLR-11976: TokenizerChain.normalize: only the first filter that is a MultiTermAwareComponent was participating
|
||||
in normalization instead of all. This bug normally doesn't matter since TextField doesn't call it.
|
||||
(Tim Allison via David Smiley)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -16,14 +16,15 @@
|
|||
*/
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* An analyzer that uses a tokenizer and a list of token filters to
|
||||
* create a TokenStream.
|
||||
|
@ -113,7 +114,7 @@ public final class TokenizerChain extends SolrAnalyzer {
|
|||
for (TokenFilterFactory filter : filters) {
|
||||
if (filter instanceof MultiTermAwareComponent) {
|
||||
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
|
||||
result = filter.create(in);
|
||||
result = filter.create(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
|
||||
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
public class TokenizerChainTest extends SolrTestCaseJ4 {
|
||||
|
||||
@Test
|
||||
public void testNormalization() throws Exception {
|
||||
String fieldName = "f";
|
||||
TokenFilterFactory[] tff = new TokenFilterFactory[2];
|
||||
tff[0] = new LowerCaseFilterFactory(Collections.EMPTY_MAP);
|
||||
tff[1] = new ASCIIFoldingFilterFactory(Collections.EMPTY_MAP);
|
||||
TokenizerChain tokenizerChain = new TokenizerChain(
|
||||
new MockTokenizerFactory(Collections.EMPTY_MAP),
|
||||
tff);
|
||||
assertEquals(new BytesRef("fooba"),
|
||||
tokenizerChain.normalize(fieldName, "FOOB\u00c4"));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue