SOLR-3054, LUCENE-3671: Add TypeTokenFilterFactory that creates TypeTokenFilter that filters tokens based on their TypeAttribute

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1234573 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-01-22 18:50:10 +00:00
parent a7cfee6b07
commit 366e1a2cd2
5 changed files with 213 additions and 0 deletions

View File

@ -450,6 +450,10 @@ New Features
* SOLR-1709: Distributed support for Date and Numeric Range Faceting
(Peter Sturge, David Smiley, hossman, Simon Willnauer)
* SOLR-3054, LUCENE-3121: Add TypeTokenFilterFactory that creates TypeTokenFilter
that filters tokens based on their TypeAttribute. (Tommaso Teofili via
Uwe Schindler)
Optimizations
----------------------
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter

View File

@ -0,0 +1,89 @@
package org.apache.solr.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.TypeTokenFilter;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware;
import java.io.IOException;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Factory class for {@link TypeTokenFilter}
* <pre class="prettyprint" >
* &lt;fieldType name="chars" class="solr.TextField" positionIncrementGap="100"&gt;
* &lt;analyzer&gt;
* &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
* &lt;filter class="solr.TypeTokenFilterFactory" types="stoptypes.txt" enablePositionIncrements="true"/&gt;
* &lt;/analyzer&gt;
* &lt;/fieldType&gt;</pre>
*/
public class TypeTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
@Override
public void init(Map<String, String> args) {
super.init(args);
assureMatchVersion();
}
@Override
public void inform(ResourceLoader loader) {
String stopTypesFiles = args.get("types");
enablePositionIncrements = getBoolean("enablePositionIncrements", false);
if (stopTypesFiles != null) {
try {
List<String> files = StrUtils.splitFileNames(stopTypesFiles);
if (files.size() > 0) {
stopTypes = new HashSet<String>();
for (String file : files) {
List<String> typesLines = loader.getLines(file.trim());
stopTypes.addAll(typesLines);
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Missing required parameter: types.");
}
}
private Set<String> stopTypes;
private boolean enablePositionIncrements;
public boolean isEnablePositionIncrements() {
return enablePositionIncrements;
}
public Set<String> getStopTypes() {
return stopTypes;
}
@Override
public TokenStream create(TokenStream input) {
return new TypeTokenFilter(enablePositionIncrements, input, stopTypes);
}
}

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<NUM>
<EMAIL>

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<HOST>
<APOSTROPHE>

View File

@ -0,0 +1,86 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrResourceLoader;
import org.junit.Test;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* Testcase for {@link TypeTokenFilterFactory}
*/
public class TestTypeTokenFilterFactory extends BaseTokenTestCase {
@Test
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader(null, null);
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
args.put("types", "stoptypes-1.txt");
args.put("enablePositionIncrements", "true");
factory.init(args);
factory.inform(loader);
Set<String> types = factory.getStopTypes();
assertTrue("types is null and it shouldn't be", types != null);
assertTrue("types Size: " + types.size() + " is not: " + 2, types.size() == 2);
assertTrue("enablePositionIncrements was set to true but not correctly parsed", factory.isEnablePositionIncrements());
factory = new TypeTokenFilterFactory();
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
args.put("enablePositionIncrements", "false");
factory.init(args);
factory.inform(loader);
types = factory.getStopTypes();
assertTrue("types is null and it shouldn't be", types != null);
assertTrue("types Size: " + types.size() + " is not: " + 4, types.size() == 4);
assertTrue("enablePositionIncrements was set to false but not correctly parsed", !factory.isEnablePositionIncrements());
}
@Test
public void testCreation() throws Exception {
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
args.put("types", "stoptypes-1.txt, stoptypes-2.txt");
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.init(args);
NumericTokenStream input = new NumericTokenStream();
input.setIntValue(123);
typeTokenFilterFactory.create(input);
}
@Test
public void testMissingTypesParameter() throws Exception {
try {
TypeTokenFilterFactory typeTokenFilterFactory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.init(args);
typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
fail("not supplying 'types' parameter should cause a SolrException");
} catch (SolrException e) {
// everything ok
}
}
}