SOLR-396: new build system support for generating stub tokenizer/tokenfilter factories when lucene jars are updated. includes newly generated factories for all lucene-analyzers-2.2.0.jar langauges

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@593359 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2007-11-08 23:40:12 +00:00
parent 12a1302ee7
commit 99dc6e24eb
17 changed files with 925 additions and 0 deletions

View File

@ -227,6 +227,12 @@ Other Changes
provided by Solr now declare their specific return types instead of just provided by Solr now declare their specific return types instead of just
using "TokenStream" (hossman) using "TokenStream" (hossman)
6. SOLR-396: Hooks add to build system for automatic generation of (stub)
Tokenizer and TokenFilter Factories.
Also: new Factories for all Tokenizers and TokenFilters provided by the
lucene-analyzers-2.2.0.jar -- includes support for German, Chinese,
Russan, Dutch, Greek, Brazilian, Thai, and French. (hossman)
================== Release 1.2, 20070602 ================== ================== Release 1.2, 20070602 ==================

View File

@ -258,6 +258,79 @@
</javadoc> </javadoc>
</target> </target>
<target name="stub-factories" depends="dist-jar"
description="Generates stub factories as needed">
<path id="stub.jars">
<!-- this needs to be a list of all jars that might contain
classes we want to build factories for
-->
<fileset dir="${lib}">
<include name="lucene-*.jar"/>
</fileset>
<fileset dir="${dist}">
<include name="*.jar"/>
<exclude name="*solrj*.jar"/>
</fileset>
</path>
<pathconvert property="jar.list" pathsep=" " refid="stub.jars" />
<property name="stub.list" value="${dest}/need-stub-factories.txt" />
<java fork="false"
classname="org.apache.solr.util.SuggestMissingFactories"
logError="true"
failonerror="true"
classpathref="test.run.classpath"
output="${stub.list}">
<arg line="${jar.list}" />
</java>
<fail unless="stub.src.path">...
This task requires that the property 'stub.src.path' be set.
It must contain a "path" listing directories containing source
files that this task should use when looking for classes that
need factories created, the format is platform specific --
typically it is colon seperated in Unix, semi-colon seperated
on windows, ie:
ant stub-factories -Dstub.src.path="./src:../lucene/contrib:../lucene/src/java"
FYI: The file ${stub.list} contains a list of classes
that seem to need stub factories. (if java files can be found to
use as guides for creating them).
</fail>
<pathconvert pathsep=" " property="stub.src.dirs">
<path>
<pathelement path="${stub.src.path}"/>
</path>
</pathconvert>
<exec executable="${basedir}/src/dev-tools/stub-analysis-factory-maker.pl"
dir="src/java/org/apache/solr/analysis/"
failonerror="true">
<redirector input="${stub.list}">
<!-- place to put special case classes we want to ignore -->
<inputfilterchain>
<linecontainsregexp negate="true">
<!-- only for internal Solr highlighting purposes -->
<regexp pattern="TokenOrderingFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- no way to leverage this in Solr -->
<regexp pattern="CachingTokenFilter"/>
</linecontainsregexp>
<linecontainsregexp negate="true">
<!-- solr and lucene both have one? ? ? ? -->
<regexp pattern="LengthFilter"/>
</linecontainsregexp>
</inputfilterchain>
</redirector>
<arg line="${stub.src.dirs}"/>
</exec>
</target>
<!-- ========================================================================= --> <!-- ========================================================================= -->
<!-- ===================== CLIENT: solrj ============================= --> <!-- ===================== CLIENT: solrj ============================= -->
<!-- ========================================================================= --> <!-- ========================================================================= -->

View File

@ -0,0 +1,146 @@
#!/usr/bin/perl
my $ASL = q{
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
};
# $Id:$
# $URL:$
#
# What this script does...
#
# 1) reads a list of fully qualified package.ClassNames from STDIN
# 2) gets a list of src dirs from command line
# 3) crawl the source dirs, looking for .java files corrisponding to
# the ClassNames, if a match is found, creates a factory for it in
# the current working directory using info about the constructor
# in the orriginal .java file
#
# Note...
# * any ClassNames not found will be logged to stdout
# * script assumes generated factorories in "org.apache.solr.analysis package
# * factories will be compilable only if orriginal class had no special
# constructor args. otherwise it will have an abstract init method that
# needs filled in.
use strict;
use warnings;
use File::Find;
my %classes = ();
while (<STDIN>) {
chomp;
$classes{$_} = 1;
}
find({wanted => \&wanted,
no_chdir => 1,
},
@ARGV);
sub wanted {
my $file = $File::Find::name;
return unless $file =~ m{/([^/]*)\.java};
my $class = $1;
open(my $f, "<", $file) or die "can't open $file: $!";
my $data;
{
local $/; # slurp
$data = <$f>;
}
close $f;
# skip abstract classes
return if ($data =~ m{abstract\s+(\w+\s+)*class\s+$class});
my $pack = "EMPTYPACKAGE";
if ($data =~ m/package\s+(.*);/) {
$pack = $1;
}
my $fullname = "${pack}.${class}";
# only looking for certain classes
return unless $classes{$fullname};
my @imports = $data =~ m/import\s+.*;/g;
if ($data =~ m{public \s+ ((?:\w+\s+)*) $class \s*\(\s* ([^\)]*) \) }sx) {
my $modifiers = $1;
my $argline = $2;
my $mainArgType;
my $mainArg;
my @orderedArgs;
my %args = map { my ($v,$k) = split /\s+/;
push @orderedArgs, $k;
if ($v =~ m/^(Reader|TokenStream)/) {
$mainArgType=$v;
$mainArg=$k;
}
($k, $v)
} split /\s*,\s*/, $argline;
my $type = ("Reader" eq $mainArgType) ? "Tokenizer" : "TokenFilter";
my $facClass = "${class}Factory";
my $facFile = "${facClass}.java";
die "$facFile exists" if -e $facFile;
open my $o, ">", $facFile
or die "can't write to $facFile: $!";
print $o "$ASL\n";
print $o "package org.apache.solr.analysis;\n";
print $o "import ${pack}.*;\n";
print $o "$_\n" foreach @imports;
print $o "import java.util.Map;\n";
print $o "public class ${facClass} extends Base${type}Factory {\n";
foreach my $arg (@orderedArgs) {
print $o " private $args{$arg} $arg;\n" unless $arg eq $mainArg;
}
if (1 < @orderedArgs) {
# we need to init something, stub it out
print $o " public abstract void init(SolrConfig solrConfig, Map<String, String> args) {\n";
print $o " super.init(solrConfig, args);\n";
print $o " // ABSTRACT BECAUSE IT'S A STUB .. FILL IT IN\n";
print $o " }\n";
}
print $o " public $class create($mainArgType $mainArg) {\n";
print $o " return new $class(", join(",", @orderedArgs), ");\n";
print $o " }\n";
print $o "}\n\n";
close $o;
delete $classes{$fullname}; # we're done with this one
} else {
print STDERR "can't stub $class\n";
}
}
if (keys %classes) {
print STDERR "Can't find java files for...\n";
foreach (keys %classes) {
print STDERR "$_\n";
}
exit -1;
}

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.br.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Set;
import java.util.Map;
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
public BrazilianStemFilter create(TokenStream in) {
return new BrazilianStemFilter(in);
}
}

View File

@ -0,0 +1,31 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.cjk.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
import java.util.Map;
public class CJKTokenizerFactory extends BaseTokenizerFactory {
public CJKTokenizer create(Reader in) {
return new CJKTokenizer(in);
}
}

View File

@ -0,0 +1,30 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.cn.*;
import java.util.Hashtable;
import org.apache.lucene.analysis.*;
import java.util.Map;
public class ChineseFilterFactory extends BaseTokenFilterFactory {
public ChineseFilter create(TokenStream in) {
return new ChineseFilter(in);
}
}

View File

@ -0,0 +1,30 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.cn.*;
import java.io.Reader;
import org.apache.lucene.analysis.*;
import java.util.Map;
public class ChineseTokenizerFactory extends BaseTokenizerFactory {
public ChineseTokenizer create(Reader in) {
return new ChineseTokenizer(in);
}
}

View File

@ -0,0 +1,36 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.nl.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.Map;
import java.util.Map;
public class DutchStemFilterFactory extends BaseTokenFilterFactory {
public DutchStemFilter create(TokenStream _in) {
return new DutchStemFilter(_in);
}
}

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.fr.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.Hashtable;
import java.util.HashSet;
import java.util.Set;
import java.util.Map;
public class FrenchStemFilterFactory extends BaseTokenFilterFactory {
public FrenchStemFilter create(TokenStream in) {
return new FrenchStemFilter(in);
}
}

View File

@ -0,0 +1,33 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.de.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.Set;
import java.util.Map;
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
public GermanStemFilter create(TokenStream in) {
return new GermanStemFilter(in);
}
}

View File

@ -0,0 +1,55 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.el.*;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
import java.util.HashMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory {
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
static {
CHARSETS.put("UnicodeGreek",GreekCharsets.UnicodeGreek);
CHARSETS.put("ISO",GreekCharsets.ISO);
CHARSETS.put("CP1253",GreekCharsets.CP1253);
}
private char[] charset = GreekCharsets.UnicodeGreek;
public void init(SolrConfig solrConfig, Map<String, String> args) {
super.init(solrConfig, args);
String charsetName = args.get("charset");
if (null != charsetName) charset = CHARSETS.get(charsetName);
if (null == charset) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Don't understand charset: " + charsetName);
}
}
public GreekLowerCaseFilter create(TokenStream in) {
return new GreekLowerCaseFilter(in,charset);
}
}

View File

@ -0,0 +1,47 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.ru.*;
import java.util.Map;
import java.util.HashMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
public class RussianCommon {
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
static {
CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
CHARSETS.put("KOI8",RussianCharsets.KOI8);
CHARSETS.put("CP1251",RussianCharsets.CP1251);
}
public static char[] getCharset(String name) {
if (null == name)
return RussianCharsets.UnicodeRussian;
char[] charset = CHARSETS.get(name);
if (null == charset) {
throw new SolrException(ErrorCode.SERVER_ERROR,
"Don't understand charset: " + name);
}
return charset;
}
}

View File

@ -0,0 +1,39 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.ru.*;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import java.util.Map;
import org.apache.solr.core.SolrConfig;
public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
private char[] charset;
public void init(SolrConfig solrConfig, Map<String, String> args) {
super.init(solrConfig, args);
charset = RussianCommon.getCharset(args.get("charset"));
}
public RussianLetterTokenizer create(Reader in) {
return new RussianLetterTokenizer(in,charset);
}
}

View File

@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.ru.*;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
import org.apache.solr.core.SolrConfig;
public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
private char[] charset;
public void init(SolrConfig solrConfig, Map<String, String> args) {
super.init(solrConfig, args);
charset = RussianCommon.getCharset(args.get("charset"));
}
public RussianLowerCaseFilter create(TokenStream in) {
return new RussianLowerCaseFilter(in,charset);
}
}

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.ru.*;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.io.IOException;
import java.util.Map;
import org.apache.solr.core.SolrConfig;
public class RussianStemFilterFactory extends BaseTokenFilterFactory {
private char[] charset;
public void init(SolrConfig solrConfig, Map<String, String> args) {
super.init(solrConfig, args);
charset = RussianCommon.getCharset(args.get("charset"));
}
public RussianStemFilter create(TokenStream in) {
return new RussianStemFilter(in,charset);
}
}

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.th.*;
import java.io.IOException;
import java.util.Locale;
import java.lang.Character.UnicodeBlock;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import java.text.BreakIterator;
import java.util.Map;
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
public ThaiWordFilter create(TokenStream input) {
return new ThaiWordFilter(input);
}
}

View File

@ -0,0 +1,213 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.util;
import java.io.Reader;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.lang.reflect.Modifier;
import java.net.URL;
import java.net.URLClassLoader;
import java.net.MalformedURLException;
import java.util.Collection;
import java.util.Map;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Enumeration;
import java.util.jar.*;
/**
* Given a list of Jar files, suggest missing analysis factories.
*
* @version $Id$
*/
public class SuggestMissingFactories {
public static void main(String[] args) throws ClassNotFoundException, IOException, NoSuchMethodException {
final File[] files = new File[args.length];
for (int i = 0; i < args.length; i++) {
files[i] = new File(args[i]);
}
final FindClasses finder = new FindClasses(files);
final ClassLoader cl = finder.getClassLoader();
final Class TOKENSTREAM
= cl.loadClass("org.apache.lucene.analysis.TokenStream");
final Class TOKENIZER
= cl.loadClass("org.apache.lucene.analysis.Tokenizer");
final Class TOKENFILTER
= cl.loadClass("org.apache.lucene.analysis.TokenFilter");
final Class TOKENIZERFACTORY
= cl.loadClass("org.apache.solr.analysis.TokenizerFactory");
final Class TOKENFILTERFACTORY
= cl.loadClass("org.apache.solr.analysis.TokenFilterFactory");
final HashSet<Class> result
= new HashSet<Class>(finder.findExtends(TOKENIZER));
result.addAll(finder.findExtends(TOKENFILTER));
result.removeAll(finder.findMethodReturns
(finder.findExtends(TOKENIZERFACTORY),
"create",
Reader.class).values());
result.removeAll(finder.findMethodReturns
(finder.findExtends(TOKENFILTERFACTORY),
"create",
TOKENSTREAM).values());
for (final Class c : result) {
System.out.println(c.getName());
}
}
}
/**
* Takes in a clazz name and a jar and finds
* all classes in that jar that extend clazz.
*/
class FindClasses {
/**
* Simple command line test method
*/
public static void main(String[] args)
throws ClassNotFoundException, IOException, NoSuchMethodException {
FindClasses finder = new FindClasses(new File(args[1]));
ClassLoader cl = finder.getClassLoader();
Class clazz = cl.loadClass(args[0]);
if (args.length == 2) {
System.out.println("Finding all extenders of " + clazz.getName());
for (Class c : finder.findExtends(clazz)) {
System.out.println(c.getName());
}
} else {
String methName = args[2];
System.out.println("Finding all extenders of " + clazz.getName() +
" with method: " + methName);
Class[] methArgs = new Class[args.length-3];
for (int i = 3; i < args.length; i++) {
methArgs[i-3] = cl.loadClass(args[i]);
}
Map<Class,Class> map = finder.findMethodReturns
(finder.findExtends(clazz),methName, methArgs);
for (Class key : map.keySet()) {
System.out.println(key.getName() + " => " + map.get(key).getName());
}
}
}
private JarFile[] jarFiles;
private ClassLoader cl;
public FindClasses(File... jars) throws IOException {
jarFiles = new JarFile[jars.length];
URL[] urls = new URL[jars.length];
try {
for (int i =0; i < jars.length; i++) {
jarFiles[i] = new JarFile(jars[i]);
urls[i] = jars[i].toURL();
}
} catch (MalformedURLException e) {
throw new RuntimeException
("WTF, how can JarFile.toURL() be malformed?", e);
}
this.cl = new URLClassLoader(urls, this.getClass().getClassLoader());
}
/**
* returns a class loader that includes the jar used to
* construct this instance
*/
public ClassLoader getClassLoader() {
return this.cl;
}
/**
* Find useful concrete (ie: not anonymous, not abstract, not an interface)
* classes that extend clazz
*/
public Collection<Class> findExtends(Class<?> clazz)
throws ClassNotFoundException {
HashSet<Class> results = new HashSet<Class>();
for (JarFile jarFile : jarFiles) {
for (Enumeration<JarEntry> e = jarFile.entries();
e.hasMoreElements() ;) {
String n = e.nextElement().getName();
if (n.endsWith(".class")) {
String cn = n.replace("/",".").substring(0,n.length()-6);
Class<?> target;
try {
target = cl.loadClass(cn);
} catch (NoClassDefFoundError e1) {
throw new ClassNotFoundException
("Can't load: " + cn, e1);
}
if (clazz.isAssignableFrom(target)
&& !target.isAnonymousClass()) {
int mods = target.getModifiers();
if (!(Modifier.isAbstract(mods) ||
Modifier.isInterface(mods))) {
results.add(target);
}
}
}
}
}
return results;
}
/**
* Given a collection of classes, returns a Map containing the
* subset of those classes that impliment the method specified,
* where the value in the map is the return type of the method
*/
public Map<Class,Class> findMethodReturns(Collection<Class> clazzes,
String methodName,
Class... parameterTypes)
throws NoSuchMethodException{
HashMap<Class,Class> results = new HashMap<Class,Class>();
for (Class clazz : clazzes) {
try {
Method m = clazz.getMethod(methodName, parameterTypes);
results.put(clazz, m.getReturnType());
} catch (NoSuchMethodException e) {
/* :NOOP: we expect this and skip clazz */
}
}
return results;
}
}