diff --git a/CHANGES.txt b/CHANGES.txt
index 8514ebfe355..56ab3581124 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -226,6 +226,12 @@ Other Changes
5. SOLR-367: The create method in all TokenFilter and Tokenizer Factories
provided by Solr now declare their specific return types instead of just
using "TokenStream" (hossman)
+
+ 6. SOLR-396: Hooks add to build system for automatic generation of (stub)
+ Tokenizer and TokenFilter Factories.
+ Also: new Factories for all Tokenizers and TokenFilters provided by the
+ lucene-analyzers-2.2.0.jar -- includes support for German, Chinese,
+ Russan, Dutch, Greek, Brazilian, Thai, and French. (hossman)
================== Release 1.2, 20070602 ==================
diff --git a/build.xml b/build.xml
index 93eea298f5a..77edb1f9485 100644
--- a/build.xml
+++ b/build.xml
@@ -258,6 +258,79 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ...
+
+ This task requires that the property 'stub.src.path' be set.
+
+ It must contain a "path" listing directories containing source
+ files that this task should use when looking for classes that
+ need factories created, the format is platform specific --
+ typically it is colon seperated in Unix, semi-colon seperated
+ on windows, ie:
+
+ ant stub-factories -Dstub.src.path="./src:../lucene/contrib:../lucene/src/java"
+
+ FYI: The file ${stub.list} contains a list of classes
+ that seem to need stub factories. (if java files can be found to
+ use as guides for creating them).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/dev-tools/stub-analysis-factory-maker.pl b/src/dev-tools/stub-analysis-factory-maker.pl
new file mode 100755
index 00000000000..114d8f611b2
--- /dev/null
+++ b/src/dev-tools/stub-analysis-factory-maker.pl
@@ -0,0 +1,146 @@
+#!/usr/bin/perl
+my $ASL = q{
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+};
+
+# $Id:$
+# $URL:$
+#
+# What this script does...
+#
+# 1) reads a list of fully qualified package.ClassNames from STDIN
+# 2) gets a list of src dirs from command line
+# 3) crawl the source dirs, looking for .java files corrisponding to
+# the ClassNames, if a match is found, creates a factory for it in
+# the current working directory using info about the constructor
+# in the orriginal .java file
+#
+# Note...
+# * any ClassNames not found will be logged to stdout
+# * script assumes generated factorories in "org.apache.solr.analysis package
+# * factories will be compilable only if orriginal class had no special
+# constructor args. otherwise it will have an abstract init method that
+# needs filled in.
+
+use strict;
+use warnings;
+use File::Find;
+
+
+my %classes = ();
+while () {
+ chomp;
+ $classes{$_} = 1;
+}
+
+find({wanted => \&wanted,
+ no_chdir => 1,
+ },
+ @ARGV);
+
+sub wanted {
+
+ my $file = $File::Find::name;
+
+ return unless $file =~ m{/([^/]*)\.java};
+ my $class = $1;
+
+ open(my $f, "<", $file) or die "can't open $file: $!";
+ my $data;
+ {
+ local $/; # slurp
+ $data = <$f>;
+ }
+ close $f;
+
+ # skip abstract classes
+ return if ($data =~ m{abstract\s+(\w+\s+)*class\s+$class});
+
+ my $pack = "EMPTYPACKAGE";
+ if ($data =~ m/package\s+(.*);/) {
+ $pack = $1;
+ }
+
+ my $fullname = "${pack}.${class}";
+ # only looking for certain classes
+ return unless $classes{$fullname};
+
+ my @imports = $data =~ m/import\s+.*;/g;
+
+ if ($data =~ m{public \s+ ((?:\w+\s+)*) $class \s*\(\s* ([^\)]*) \) }sx) {
+ my $modifiers = $1;
+ my $argline = $2;
+
+ my $mainArgType;
+ my $mainArg;
+ my @orderedArgs;
+
+ my %args = map { my ($v,$k) = split /\s+/;
+ push @orderedArgs, $k;
+ if ($v =~ m/^(Reader|TokenStream)/) {
+ $mainArgType=$v;
+ $mainArg=$k;
+ }
+ ($k, $v)
+ } split /\s*,\s*/, $argline;
+
+ my $type = ("Reader" eq $mainArgType) ? "Tokenizer" : "TokenFilter";
+
+ my $facClass = "${class}Factory";
+ my $facFile = "${facClass}.java";
+ die "$facFile exists" if -e $facFile;
+ open my $o, ">", $facFile
+ or die "can't write to $facFile: $!";
+
+ print $o "$ASL\n";
+ print $o "package org.apache.solr.analysis;\n";
+ print $o "import ${pack}.*;\n";
+ print $o "$_\n" foreach @imports;
+ print $o "import java.util.Map;\n";
+ print $o "public class ${facClass} extends Base${type}Factory {\n";
+ foreach my $arg (@orderedArgs) {
+ print $o " private $args{$arg} $arg;\n" unless $arg eq $mainArg;
+ }
+ if (1 < @orderedArgs) {
+ # we need to init something, stub it out
+ print $o " public abstract void init(SolrConfig solrConfig, Map args) {\n";
+ print $o " super.init(solrConfig, args);\n";
+ print $o " // ABSTRACT BECAUSE IT'S A STUB .. FILL IT IN\n";
+ print $o " }\n";
+ }
+ print $o " public $class create($mainArgType $mainArg) {\n";
+ print $o " return new $class(", join(",", @orderedArgs), ");\n";
+ print $o " }\n";
+ print $o "}\n\n";
+ close $o;
+
+ delete $classes{$fullname}; # we're done with this one
+ } else {
+ print STDERR "can't stub $class\n";
+ }
+}
+
+if (keys %classes) {
+ print STDERR "Can't find java files for...\n";
+ foreach (keys %classes) {
+ print STDERR "$_\n";
+ }
+ exit -1;
+}
+
diff --git a/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java b/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
new file mode 100644
index 00000000000..1045ca2c466
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/BrazilianStemFilterFactory.java
@@ -0,0 +1,35 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.br.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
+import java.util.Map;
+public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
+ public BrazilianStemFilter create(TokenStream in) {
+ return new BrazilianStemFilter(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java b/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
new file mode 100644
index 00000000000..e68265c4ce9
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/CJKTokenizerFactory.java
@@ -0,0 +1,31 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.cjk.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.Tokenizer;
+import java.io.Reader;
+import java.util.Map;
+public class CJKTokenizerFactory extends BaseTokenizerFactory {
+ public CJKTokenizer create(Reader in) {
+ return new CJKTokenizer(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/ChineseFilterFactory.java b/src/java/org/apache/solr/analysis/ChineseFilterFactory.java
new file mode 100644
index 00000000000..0076220bf38
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/ChineseFilterFactory.java
@@ -0,0 +1,30 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.cn.*;
+import java.util.Hashtable;
+import org.apache.lucene.analysis.*;
+import java.util.Map;
+public class ChineseFilterFactory extends BaseTokenFilterFactory {
+ public ChineseFilter create(TokenStream in) {
+ return new ChineseFilter(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java b/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java
new file mode 100644
index 00000000000..a817ce000a5
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/ChineseTokenizerFactory.java
@@ -0,0 +1,30 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.cn.*;
+import java.io.Reader;
+import org.apache.lucene.analysis.*;
+import java.util.Map;
+public class ChineseTokenizerFactory extends BaseTokenizerFactory {
+ public ChineseTokenizer create(Reader in) {
+ return new ChineseTokenizer(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java b/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java
new file mode 100644
index 00000000000..8dfb8bbab5f
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/DutchStemFilterFactory.java
@@ -0,0 +1,36 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.nl.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Map;
+import java.util.Map;
+public class DutchStemFilterFactory extends BaseTokenFilterFactory {
+ public DutchStemFilter create(TokenStream _in) {
+ return new DutchStemFilter(_in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java b/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java
new file mode 100644
index 00000000000..42a92d73dfb
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/FrenchStemFilterFactory.java
@@ -0,0 +1,35 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.fr.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Map;
+public class FrenchStemFilterFactory extends BaseTokenFilterFactory {
+ public FrenchStemFilter create(TokenStream in) {
+ return new FrenchStemFilter(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java b/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
new file mode 100644
index 00000000000..109c1030354
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/GermanStemFilterFactory.java
@@ -0,0 +1,33 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.de.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.io.IOException;
+import java.util.Set;
+import java.util.Map;
+public class GermanStemFilterFactory extends BaseTokenFilterFactory {
+ public GermanStemFilter create(TokenStream in) {
+ return new GermanStemFilter(in);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java b/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
new file mode 100644
index 00000000000..5086a409315
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/GreekLowerCaseFilterFactory.java
@@ -0,0 +1,55 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.el.*;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory {
+
+ private static Map CHARSETS = new HashMap();
+ static {
+ CHARSETS.put("UnicodeGreek",GreekCharsets.UnicodeGreek);
+ CHARSETS.put("ISO",GreekCharsets.ISO);
+ CHARSETS.put("CP1253",GreekCharsets.CP1253);
+ }
+
+ private char[] charset = GreekCharsets.UnicodeGreek;
+
+
+ public void init(SolrConfig solrConfig, Map args) {
+ super.init(solrConfig, args);
+ String charsetName = args.get("charset");
+ if (null != charsetName) charset = CHARSETS.get(charsetName);
+ if (null == charset) {
+ throw new SolrException(ErrorCode.SERVER_ERROR,
+ "Don't understand charset: " + charsetName);
+ }
+ }
+ public GreekLowerCaseFilter create(TokenStream in) {
+ return new GreekLowerCaseFilter(in,charset);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/RussianCommon.java b/src/java/org/apache/solr/analysis/RussianCommon.java
new file mode 100644
index 00000000000..839211e0d3c
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/RussianCommon.java
@@ -0,0 +1,47 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.ru.*;
+import java.util.Map;
+import java.util.HashMap;
+import org.apache.solr.core.SolrConfig;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+public class RussianCommon {
+
+ private static Map CHARSETS = new HashMap();
+ static {
+ CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
+ CHARSETS.put("KOI8",RussianCharsets.KOI8);
+ CHARSETS.put("CP1251",RussianCharsets.CP1251);
+ }
+
+ public static char[] getCharset(String name) {
+ if (null == name)
+ return RussianCharsets.UnicodeRussian;
+
+ char[] charset = CHARSETS.get(name);
+ if (null == charset) {
+ throw new SolrException(ErrorCode.SERVER_ERROR,
+ "Don't understand charset: " + name);
+ }
+ return charset;
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java b/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
new file mode 100644
index 00000000000..a7f9d7810d2
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/RussianLetterTokenizerFactory.java
@@ -0,0 +1,39 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.ru.*;
+import java.io.Reader;
+import org.apache.lucene.analysis.CharTokenizer;
+import java.util.Map;
+import org.apache.solr.core.SolrConfig;
+public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
+
+ private char[] charset;
+
+ public void init(SolrConfig solrConfig, Map args) {
+ super.init(solrConfig, args);
+ charset = RussianCommon.getCharset(args.get("charset"));
+ }
+
+ public RussianLetterTokenizer create(Reader in) {
+ return new RussianLetterTokenizer(in,charset);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java b/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
new file mode 100644
index 00000000000..72865eaf8a4
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/RussianLowerCaseFilterFactory.java
@@ -0,0 +1,40 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.ru.*;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import java.util.Map;
+import org.apache.solr.core.SolrConfig;
+public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
+
+ private char[] charset;
+
+ public void init(SolrConfig solrConfig, Map args) {
+ super.init(solrConfig, args);
+ charset = RussianCommon.getCharset(args.get("charset"));
+ }
+
+ public RussianLowerCaseFilter create(TokenStream in) {
+ return new RussianLowerCaseFilter(in,charset);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java b/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
new file mode 100644
index 00000000000..944eea305cc
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/RussianStemFilterFactory.java
@@ -0,0 +1,41 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.ru.*;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.io.IOException;
+import java.util.Map;
+import org.apache.solr.core.SolrConfig;
+public class RussianStemFilterFactory extends BaseTokenFilterFactory {
+
+ private char[] charset;
+
+ public void init(SolrConfig solrConfig, Map args) {
+ super.init(solrConfig, args);
+ charset = RussianCommon.getCharset(args.get("charset"));
+ }
+
+ public RussianStemFilter create(TokenStream in) {
+ return new RussianStemFilter(in,charset);
+ }
+}
+
diff --git a/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java b/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java
new file mode 100644
index 00000000000..670f2dbde53
--- /dev/null
+++ b/src/java/org/apache/solr/analysis/ThaiWordFilterFactory.java
@@ -0,0 +1,35 @@
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.solr.analysis;
+import org.apache.lucene.analysis.th.*;
+import java.io.IOException;
+import java.util.Locale;
+import java.lang.Character.UnicodeBlock;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import java.text.BreakIterator;
+import java.util.Map;
+public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
+ public ThaiWordFilter create(TokenStream input) {
+ return new ThaiWordFilter(input);
+ }
+}
+
diff --git a/src/java/org/apache/solr/util/SuggestMissingFactories.java b/src/java/org/apache/solr/util/SuggestMissingFactories.java
new file mode 100644
index 00000000000..9556c1e8cbe
--- /dev/null
+++ b/src/java/org/apache/solr/util/SuggestMissingFactories.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+import java.io.Reader;
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.MalformedURLException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.HashSet;
+import java.util.HashMap;
+import java.util.Enumeration;
+import java.util.jar.*;
+
+/**
+ * Given a list of Jar files, suggest missing analysis factories.
+ *
+ * @version $Id$
+ */
+public class SuggestMissingFactories {
+
+ public static void main(String[] args) throws ClassNotFoundException, IOException, NoSuchMethodException {
+
+ final File[] files = new File[args.length];
+ for (int i = 0; i < args.length; i++) {
+ files[i] = new File(args[i]);
+ }
+ final FindClasses finder = new FindClasses(files);
+ final ClassLoader cl = finder.getClassLoader();
+
+ final Class TOKENSTREAM
+ = cl.loadClass("org.apache.lucene.analysis.TokenStream");
+ final Class TOKENIZER
+ = cl.loadClass("org.apache.lucene.analysis.Tokenizer");
+ final Class TOKENFILTER
+ = cl.loadClass("org.apache.lucene.analysis.TokenFilter");
+ final Class TOKENIZERFACTORY
+ = cl.loadClass("org.apache.solr.analysis.TokenizerFactory");
+ final Class TOKENFILTERFACTORY
+ = cl.loadClass("org.apache.solr.analysis.TokenFilterFactory");
+
+
+ final HashSet result
+ = new HashSet(finder.findExtends(TOKENIZER));
+ result.addAll(finder.findExtends(TOKENFILTER));
+
+ result.removeAll(finder.findMethodReturns
+ (finder.findExtends(TOKENIZERFACTORY),
+ "create",
+ Reader.class).values());
+ result.removeAll(finder.findMethodReturns
+ (finder.findExtends(TOKENFILTERFACTORY),
+ "create",
+ TOKENSTREAM).values());
+
+ for (final Class c : result) {
+ System.out.println(c.getName());
+ }
+ }
+
+}
+
+/**
+ * Takes in a clazz name and a jar and finds
+ * all classes in that jar that extend clazz.
+ */
+class FindClasses {
+
+ /**
+ * Simple command line test method
+ */
+ public static void main(String[] args)
+ throws ClassNotFoundException, IOException, NoSuchMethodException {
+
+ FindClasses finder = new FindClasses(new File(args[1]));
+ ClassLoader cl = finder.getClassLoader();
+ Class clazz = cl.loadClass(args[0]);
+ if (args.length == 2) {
+
+ System.out.println("Finding all extenders of " + clazz.getName());
+ for (Class c : finder.findExtends(clazz)) {
+ System.out.println(c.getName());
+ }
+ } else {
+ String methName = args[2];
+ System.out.println("Finding all extenders of " + clazz.getName() +
+ " with method: " + methName);
+
+ Class[] methArgs = new Class[args.length-3];
+ for (int i = 3; i < args.length; i++) {
+ methArgs[i-3] = cl.loadClass(args[i]);
+ }
+ Map map = finder.findMethodReturns
+ (finder.findExtends(clazz),methName, methArgs);
+
+ for (Class key : map.keySet()) {
+ System.out.println(key.getName() + " => " + map.get(key).getName());
+ }
+
+
+ }
+ }
+
+ private JarFile[] jarFiles;
+ private ClassLoader cl;
+ public FindClasses(File... jars) throws IOException {
+
+
+ jarFiles = new JarFile[jars.length];
+ URL[] urls = new URL[jars.length];
+ try {
+ for (int i =0; i < jars.length; i++) {
+ jarFiles[i] = new JarFile(jars[i]);
+ urls[i] = jars[i].toURL();
+ }
+ } catch (MalformedURLException e) {
+ throw new RuntimeException
+ ("WTF, how can JarFile.toURL() be malformed?", e);
+ }
+
+ this.cl = new URLClassLoader(urls, this.getClass().getClassLoader());
+ }
+
+ /**
+ * returns a class loader that includes the jar used to
+ * construct this instance
+ */
+ public ClassLoader getClassLoader() {
+ return this.cl;
+ }
+
+ /**
+ * Find useful concrete (ie: not anonymous, not abstract, not an interface)
+ * classes that extend clazz
+ */
+ public Collection findExtends(Class> clazz)
+ throws ClassNotFoundException {
+
+ HashSet results = new HashSet();
+
+ for (JarFile jarFile : jarFiles) {
+ for (Enumeration e = jarFile.entries();
+ e.hasMoreElements() ;) {
+
+ String n = e.nextElement().getName();
+ if (n.endsWith(".class")) {
+ String cn = n.replace("/",".").substring(0,n.length()-6);
+ Class> target;
+ try {
+ target = cl.loadClass(cn);
+ } catch (NoClassDefFoundError e1) {
+ throw new ClassNotFoundException
+ ("Can't load: " + cn, e1);
+ }
+
+ if (clazz.isAssignableFrom(target)
+ && !target.isAnonymousClass()) {
+
+ int mods = target.getModifiers();
+ if (!(Modifier.isAbstract(mods) ||
+ Modifier.isInterface(mods))) {
+ results.add(target);
+ }
+ }
+ }
+ }
+ }
+ return results;
+ }
+
+ /**
+ * Given a collection of classes, returns a Map containing the
+ * subset of those classes that impliment the method specified,
+ * where the value in the map is the return type of the method
+ */
+ public Map findMethodReturns(Collection clazzes,
+ String methodName,
+ Class... parameterTypes)
+ throws NoSuchMethodException{
+
+ HashMap results = new HashMap();
+ for (Class clazz : clazzes) {
+ try {
+ Method m = clazz.getMethod(methodName, parameterTypes);
+ results.put(clazz, m.getReturnType());
+ } catch (NoSuchMethodException e) {
+ /* :NOOP: we expect this and skip clazz */
+ }
+ }
+ return results;
+ }
+}
+