add factories for 'new' Lucene analysis classes where it makes sense

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@676390 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2008-07-13 18:15:43 +00:00
parent 7de8ac0a90
commit 7ad6fc5bb0
7 changed files with 283 additions and 0 deletions

View File

@ -254,6 +254,18 @@
<!-- no way to leverage this in Solr -->
<regexp pattern="CachingTokenFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- no way to leverage this in Solr -->
<regexp pattern="HyphenationCompoundWordTokenFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- no way to leverage these in Solr (yet) -->
<regexp pattern="Sink\|Tee"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- Solr already has a different impl for this -->
<regexp pattern="SynonymTokenFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- solr and lucene both have one? ? ? ? -->
<regexp pattern="LengthFilter"/>

View File

@ -0,0 +1,59 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.compound.*;
import org.apache.solr.util.plugin.ResourceLoaderAware;
import org.apache.solr.common.ResourceLoader;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import java.util.List;
import java.util.Set;
import java.util.Map;
import java.io.IOException;
public class DictionaryCompoundWordTokenFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private Set dictionary;
private String dictFile;
private int minWordSize;
private int minSubwordSize;
private int maxSubwordSize;
private boolean onlyLongestMatch;
public void init(Map<String, String> args) {
super.init(args);
dictFile = args.get("dictionary");
minWordSize= getInt("minWordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
minSubwordSize= getInt("minSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
maxSubwordSize= getInt("maxSubwordSize",CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
onlyLongestMatch = getBoolean("onlyLongestMatch",true);
}
public void inform(ResourceLoader loader) {
try {
List<String> wlist = loader.getLines(dictFile);
dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
public DictionaryCompoundWordTokenFilter create(TokenStream input) {
return new DictionaryCompoundWordTokenFilter(input,dictionary,minWordSize,minSubwordSize,maxSubwordSize,onlyLongestMatch);
}
}

View File

@ -0,0 +1,62 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.util.plugin.ResourceLoaderAware;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.fr.*;
import java.io.IOException;
import java.util.Set;
import java.util.HashSet;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import java.util.Map;
import java.util.List;
import java.util.Set;
import java.io.IOException;
public class ElisionFilterFactory extends BaseTokenFilterFactory implements ResourceLoaderAware {
private Set articles;
public void inform(ResourceLoader loader) {
String articlesFile = args.get("articles");
if (articlesFile != null) {
try {
List<String> wlist = loader.getLines(articlesFile);
articles = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
throw new RuntimeException("No articles specified for ElisionFilterFactory");
}
}
public ElisionFilter create(TokenStream input) {
return new ElisionFilter(input,articles);
}
}

View File

@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
import java.io.IOException;
import java.util.Map;
public class NumericPayloadTokenFilterFactory extends BaseTokenFilterFactory {
private float payload;
private String typeMatch;
public void init(Map<String, String> args) {
super.init(args);
payload = Float.parseFloat(args.get("payload"));
typeMatch = args.get("typeMatch");
}
public NumericPayloadTokenFilter create(TokenStream input) {
return new NumericPayloadTokenFilter(input,payload,typeMatch);
}
}

View File

@ -0,0 +1,44 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.shingle.*;
import java.io.IOException;
import java.util.LinkedList;
import java.util.Iterator;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.util.Map;
public class ShingleFilterFactory extends BaseTokenFilterFactory {
private int maxShingleSize;
private boolean outputUnigrams;
public void init(Map<String, String> args) {
super.init(args);
maxShingleSize = getInt("maxShingleSize",
ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
outputUnigrams = getBoolean("outputUnigrams", true);
}
public ShingleFilter create(TokenStream input) {
ShingleFilter r = new ShingleFilter(input,maxShingleSize);
r.setOutputUnigrams(outputUnigrams);
return r;
}
}

View File

@ -0,0 +1,33 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Payload;
import java.io.IOException;
import java.util.Map;
public class TokenOffsetPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TokenOffsetPayloadTokenFilter create(TokenStream input) {
return new TokenOffsetPayloadTokenFilter(input);
}
}

View File

@ -0,0 +1,33 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.payloads.*;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.index.Payload;
import java.io.IOException;
import java.util.Map;
public class TypeAsPayloadTokenFilterFactory extends BaseTokenFilterFactory {
public TypeAsPayloadTokenFilter create(TokenStream input) {
return new TypeAsPayloadTokenFilter(input);
}
}