mirror of https://github.com/apache/lucene.git
SOLR-3054, LUCENE-3671: Add TypeTokenFilterFactory that creates TypeTokenFilter that filters tokens based on their TypeAttribute
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1234573 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a7cfee6b07
commit
366e1a2cd2
|
@ -450,6 +450,10 @@ New Features
|
|||
* SOLR-1709: Distributed support for Date and Numeric Range Faceting
|
||||
(Peter Sturge, David Smiley, hossman, Simon Willnauer)
|
||||
|
||||
* SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter
|
||||
that filters tokens based on their TypeAttribute. (Tommaso Teofili via
|
||||
Uwe Schindler)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
package org.apache.solr.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.TypeTokenFilter;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.util.plugin.ResourceLoaderAware;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Factory class for {@link TypeTokenFilter}
|
||||
* <pre class="prettyprint" >
|
||||
* <fieldType name="chars" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt" enablePositionIncrements="true"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class TypeTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
|
||||
|
||||
@Override
|
||||
public void init(Map<String, String> args) {
|
||||
super.init(args);
|
||||
assureMatchVersion();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void inform(ResourceLoader loader) {
|
||||
String stopTypesFiles = args.get("types");
|
||||
enablePositionIncrements = getBoolean("enablePositionIncrements", false);
|
||||
|
||||
if (stopTypesFiles != null) {
|
||||
try {
|
||||
List<String> files = StrUtils.splitFileNames(stopTypesFiles);
|
||||
if (files.size() > 0) {
|
||||
stopTypes = new HashSet<String>();
|
||||
for (String file : files) {
|
||||
List<String> typesLines = loader.getLines(file.trim());
|
||||
stopTypes.addAll(typesLines);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required parameter: types.");
|
||||
}
|
||||
}
|
||||
|
||||
private Set<String> stopTypes;
|
||||
private boolean enablePositionIncrements;
|
||||
|
||||
public boolean isEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
public Set<String> getStopTypes() {
|
||||
return stopTypes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new TypeTokenFilter(enablePositionIncrements, input, stopTypes);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
<NUM>
|
||||
<EMAIL>
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
<HOST>
|
||||
<APOSTROPHE>
|
|
@ -0,0 +1,86 @@
|
|||
package org.apache.solr.analysis;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.solr.common.ResourceLoader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Testcase for {@link TypeTokenFilterFactory}
|
||||
*/
|
||||
public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
|
||||
|
||||
@Test
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new SolrResourceLoader(null, null);
|
||||
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
args.put("types", "stoptypes-1.txt");
|
||||
args.put("enablePositionIncrements", "true");
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
Set<String> types = factory.getStopTypes();
|
||||
assertTrue("types is null and it shouldn't be", types != null);
|
||||
assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
|
||||
assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
|
||||
|
||||
factory = new TypeTokenFilterFactory();
|
||||
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
|
||||
args.put("enablePositionIncrements", "false");
|
||||
factory.init(args);
|
||||
factory.inform(loader);
|
||||
types = factory.getStopTypes();
|
||||
assertTrue("types is null and it shouldn't be", types != null);
|
||||
assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
|
||||
assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreation() throws Exception {
|
||||
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
|
||||
args.put("enablePositionIncrements", "false");
|
||||
typeTokenFilterFactory.init(args);
|
||||
NumericTokenStream input = new NumericTokenStream();
|
||||
input.setIntValue(123);
|
||||
typeTokenFilterFactory.create(input);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMissingTypesParameter() throws Exception {
|
||||
try {
|
||||
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
|
||||
args.put("enablePositionIncrements", "false");
|
||||
typeTokenFilterFactory.init(args);
|
||||
typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
|
||||
fail("not supplying 'types' parameter should cause a SolrException");
|
||||
} catch (SolrException e) {
|
||||
// everything ok
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue