mirror of https://github.com/apache/lucene.git
SOLR-3359: add analyzer attribute/property to SynonymFilterFactory
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1504037 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fefa4517c9
commit
ec803e133e
|
@ -57,6 +57,9 @@ New features
|
|||
* LUCENE-5098: New broadword utility methods in oal.util.BroadWord.
|
||||
(Paul Elschot via Adrien Grand, Dawid Weiss)
|
||||
|
||||
* SOLR-3359: Added analyzer attribute/property to SynonymFilterFactory.
|
||||
(Ryo Onodera via Koji Sekiguchi)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.
|
||||
|
|
|
@ -68,6 +68,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
private final String synonyms;
|
||||
private final String format;
|
||||
private final boolean expand;
|
||||
private final String analyzerName;
|
||||
private final Map<String, String> tokArgs = new HashMap<String, String>();
|
||||
|
||||
private SynonymMap map;
|
||||
|
@ -79,7 +80,13 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
format = get(args, "format");
|
||||
expand = getBoolean(args, "expand", true);
|
||||
|
||||
analyzerName = get(args, "analyzer");
|
||||
tokenizerFactory = get(args, "tokenizerFactory");
|
||||
if (analyzerName != null && tokenizerFactory != null) {
|
||||
throw new IllegalArgumentException("Analyzer and TokenizerFactory can't be specified both: " +
|
||||
analyzerName + " and " + tokenizerFactory);
|
||||
}
|
||||
|
||||
if (tokenizerFactory != null) {
|
||||
assureMatchVersion();
|
||||
tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
|
||||
|
@ -104,15 +111,20 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
@Override
|
||||
public void inform(ResourceLoader loader) throws IOException {
|
||||
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
|
||||
Analyzer analyzer;
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
|
||||
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
if (analyzerName != null) {
|
||||
analyzer = loadAnalyzer(loader, analyzerName);
|
||||
} else {
|
||||
analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
|
||||
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
|
||||
return new TokenStreamComponents(tokenizer, stream);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
if (format == null || format.equals("solr")) {
|
||||
|
@ -188,4 +200,17 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
|
|||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
|
||||
Class<? extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
|
||||
try {
|
||||
Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_50);
|
||||
if (analyzer instanceof ResourceLoaderAware) {
|
||||
((ResourceLoaderAware) analyzer).inform(loader);
|
||||
}
|
||||
return analyzer;
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|||
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
|
||||
public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
|
||||
/** test that we can parse and use the solr syn file */
|
||||
|
@ -64,6 +65,28 @@ public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/** Test that analyzer and tokenizerFactory is both specified */
|
||||
public void testAnalyzer() throws Exception {
|
||||
final String analyzer = CJKAnalyzer.class.getName();
|
||||
final String tokenizerFactory = PatternTokenizerFactory.class.getName();
|
||||
TokenFilterFactory factory = null;
|
||||
|
||||
factory = tokenFilterFactory("Synonym",
|
||||
"synonyms", "synonyms2.txt",
|
||||
"analyzer", analyzer);
|
||||
assertNotNull(factory);
|
||||
|
||||
try {
|
||||
tokenFilterFactory("Synonym",
|
||||
"synonyms", "synonyms.txt",
|
||||
"analyzer", analyzer,
|
||||
"tokenizerFactory", tokenizerFactory);
|
||||
fail();
|
||||
} catch (IllegalArgumentException expected) {
|
||||
assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified both"));
|
||||
}
|
||||
}
|
||||
|
||||
static final String TOK_SYN_ARG_VAL = "argument";
|
||||
static final String TOK_FOO_ARG_VAL = "foofoofoo";
|
||||
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
|
||||
蛙 => カエル
|
Loading…
Reference in New Issue