mirror of
https://github.com/apache/lucene.git
synced 2025-02-20 17:07:09 +00:00
LUCENE-4185: don't apply CharFilters twice in Solr
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1358481 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fcfaf3f0f5
commit
6569c4854b
@ -57,6 +57,8 @@ Bug Fixes
|
||||
|
||||
* SOLR-3587: After reloading a SolrCore, the original Analyzer is still used rather than a new
|
||||
one. (Alexey Serba, yonik, rmuir, Mark Miller)
|
||||
|
||||
* LUCENE-4185: Fix a bug where CharFilters were wrongly being applied twice. (Michael Froh, rmuir)
|
||||
|
||||
Other Changes
|
||||
|
||||
|
@ -22,17 +22,12 @@ import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
*
|
||||
* An analyzer that uses a tokenizer and a list of token filters to
|
||||
* create a TokenStream.
|
||||
*/
|
||||
|
||||
//
|
||||
// An analyzer that uses a tokenizer and a list of token filters to
|
||||
// create a TokenStream.
|
||||
//
|
||||
public final class TokenizerChain extends SolrAnalyzer {
|
||||
final private CharFilterFactory[] charFilters;
|
||||
final private TokenizerFactory tokenizer;
|
||||
@ -52,22 +47,6 @@ public final class TokenizerChain extends SolrAnalyzer {
|
||||
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
|
||||
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
|
||||
|
||||
class SolrTokenStreamComponents extends TokenStreamComponents {
|
||||
public SolrTokenStreamComponents(final Tokenizer source, final TokenStream result) {
|
||||
super(source, result);
|
||||
}
|
||||
|
||||
// TODO: what is going on here?
|
||||
@Override
|
||||
protected void reset(Reader reader) throws IOException {
|
||||
// the tokenizers are currently reset by the indexing process, so only
|
||||
// the tokenizer needs to be reset.
|
||||
Reader r = initReader(null, reader);
|
||||
super.reset(r);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Reader initReader(String fieldName, Reader reader) {
|
||||
if (charFilters != null && charFilters.length > 0) {
|
||||
@ -82,12 +61,12 @@ public final class TokenizerChain extends SolrAnalyzer {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader aReader) {
|
||||
Tokenizer tk = tokenizer.create( initReader(fieldName, aReader) );
|
||||
Tokenizer tk = tokenizer.create( aReader );
|
||||
TokenStream ts = tk;
|
||||
for (TokenFilterFactory filter : filters) {
|
||||
ts = filter.create(ts);
|
||||
}
|
||||
return new SolrTokenStreamComponents(tk, ts);
|
||||
return new TokenStreamComponents(tk, ts);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,52 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<schema name="test" version="1.0">
|
||||
<types>
|
||||
<fieldType name="int" class="solr.TrieIntField" precisionStep="4" omitNorms="true" positionIncrementGap="0"/>
|
||||
<!-- charfilter only at query-time -->
|
||||
<fieldType name="text" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<charFilter class="solr.MockCharFilterFactory" remainder="7"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
<!-- charfilter only at index-time -->
|
||||
<fieldType name="text2" class="solr.TextField">
|
||||
<analyzer type="index">
|
||||
<charFilter class="solr.MockCharFilterFactory" remainder="7"/>
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
</types>
|
||||
|
||||
<fields>
|
||||
<field name="id" type="int" indexed="true" stored="true" multiValued="false" required="true"/>
|
||||
<field name="content" type="text" indexed="true" stored="true"/>
|
||||
<field name="content2" type="text2" indexed="true" stored="true"/>
|
||||
</fields>
|
||||
|
||||
<defaultSearchField>content</defaultSearchField>
|
||||
<uniqueKey>id</uniqueKey>
|
||||
|
||||
</schema>
|
@ -0,0 +1,77 @@
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Tests that charfilters are being applied properly
|
||||
* (e.g. once and only once) with mockcharfilter.
|
||||
*/
|
||||
public class TestCharFilters extends SolrTestCaseJ4 {
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
initCore("solrconfig-basic.xml","schema-charfilters.xml");
|
||||
// add some docs
|
||||
assertU(adoc("id", "1", "content", "aab"));
|
||||
assertU(adoc("id", "2", "content", "aabaa"));
|
||||
assertU(adoc("id", "3", "content2", "ab"));
|
||||
assertU(adoc("id", "4", "content2", "aba"));
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test query analysis: at querytime MockCharFilter will
|
||||
* double the 'a', so ab -> aab, and aba -> aabaa
|
||||
*
|
||||
* We run the test twice to make sure reuse is working
|
||||
*/
|
||||
public void testQueryAnalysis() {
|
||||
assertQ("Query analysis: ",
|
||||
req("fl", "id", "q", "content:ab", "sort", "id asc"),
|
||||
"//*[@numFound='1']",
|
||||
"//result/doc[1]/int[@name='id'][.=1]"
|
||||
);
|
||||
assertQ("Query analysis: ",
|
||||
req("fl", "id", "q", "content:aba", "sort", "id asc"),
|
||||
"//*[@numFound='1']",
|
||||
"//result/doc[1]/int[@name='id'][.=2]"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test index analysis: at indextime MockCharFilter will
|
||||
* double the 'a', so ab -> aab, and aba -> aabaa
|
||||
*
|
||||
* We run the test twice to make sure reuse is working
|
||||
*/
|
||||
public void testIndexAnalysis() {
|
||||
assertQ("Index analysis: ",
|
||||
req("fl", "id", "q", "content2:aab", "sort", "id asc"),
|
||||
"//*[@numFound='1']",
|
||||
"//result/doc[1]/int[@name='id'][.=3]"
|
||||
);
|
||||
assertQ("Index analysis: ",
|
||||
req("fl", "id", "q", "content2:aabaa", "sort", "id asc"),
|
||||
"//*[@numFound='1']",
|
||||
"//result/doc[1]/int[@name='id'][.=4]"
|
||||
);
|
||||
}
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
package org.apache.solr.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.CharStream;
|
||||
import org.apache.lucene.analysis.MockCharFilter;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link MockCharFilter} for testing purposes.
|
||||
*/
|
||||
public class MockCharFilterFactory extends CharFilterFactory {
|
||||
int remainder;
|
||||
|
||||
@Override
|
||||
public void init(Map<String,String> args) {
|
||||
super.init(args);
|
||||
String sval = args.get("remainder");
|
||||
if (sval == null) {
|
||||
throw new IllegalArgumentException("remainder is mandatory");
|
||||
}
|
||||
remainder = Integer.parseInt(sval);
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharStream create(CharStream input) {
|
||||
return new MockCharFilter(input, remainder);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user