SOLR-3359: add analyzer attribute/property to SynonymFilterFactory

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1504037 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Koji Sekiguchi 2013-07-17 07:50:32 +00:00
parent fefa4517c9
commit ec803e133e
4 changed files with 74 additions and 8 deletions

View File

@ -57,6 +57,9 @@ New features
* LUCENE-5098: New broadword utility methods in oal.util.BroadWord.
(Paul Elschot via Adrien Grand, Dawid Weiss)
* SOLR-3359: Added analyzer attribute/property to SynonymFilterFactory.
(Ryo Onodera via Koji Sekiguchi)
API Changes
* LUCENE-5094: Add ramBytesUsed() to MultiDocValues.OrdinalMap.

View File

@ -68,6 +68,7 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
private final String synonyms;
private final String format;
private final boolean expand;
private final String analyzerName;
private final Map<String, String> tokArgs = new HashMap<String, String>();
private SynonymMap map;
@ -79,7 +80,13 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
format = get(args, "format");
expand = getBoolean(args, "expand", true);
analyzerName = get(args, "analyzer");
tokenizerFactory = get(args, "tokenizerFactory");
if (analyzerName != null && tokenizerFactory != null) {
throw new IllegalArgumentException("Analyzer and TokenizerFactory can't be specified both: " +
analyzerName + " and " + tokenizerFactory);
}
if (tokenizerFactory != null) {
assureMatchVersion();
tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString());
@ -104,15 +111,20 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
@Override
public void inform(ResourceLoader loader) throws IOException {
final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory);
Analyzer analyzer;
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
if (analyzerName != null) {
analyzer = loadAnalyzer(loader, analyzerName);
} else {
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
}
try {
if (format == null || format.equals("solr")) {
@ -188,4 +200,17 @@ public class SynonymFilterFactory extends TokenFilterFactory implements Resource
throw new RuntimeException(e);
}
}
private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
Class<? extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
try {
Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_50);
if (analyzer instanceof ResourceLoaderAware) {
((ResourceLoaderAware) analyzer).inform(loader);
}
return analyzer;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
/** test that we can parse and use the solr syn file */
@ -64,6 +65,28 @@ public class TestSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
}
}
/** Test that analyzer and tokenizerFactory is both specified */
public void testAnalyzer() throws Exception {
final String analyzer = CJKAnalyzer.class.getName();
final String tokenizerFactory = PatternTokenizerFactory.class.getName();
TokenFilterFactory factory = null;
factory = tokenFilterFactory("Synonym",
"synonyms", "synonyms2.txt",
"analyzer", analyzer);
assertNotNull(factory);
try {
tokenFilterFactory("Synonym",
"synonyms", "synonyms.txt",
"analyzer", analyzer,
"tokenizerFactory", tokenizerFactory);
fail();
} catch (IllegalArgumentException expected) {
assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified both"));
}
}
static final String TOK_SYN_ARG_VAL = "argument";
static final String TOK_FOO_ARG_VAL = "foofoofoo";

View File

@ -0,0 +1,15 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#-----------------------------------------------------------------------
蛙 => カエル