mirror of https://github.com/apache/lucene.git
SOLR-396: new build system support for generating stub tokenizer/tokenfilter factories when lucene jars are updated. includes newly generated factories for all lucene-analyzers-2.2.0.jar langauges
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@593359 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
12a1302ee7
commit
99dc6e24eb
|
@ -226,6 +226,12 @@ Other Changes
|
||||||
5. SOLR-367: The create method in all TokenFilter and Tokenizer Factories
|
5. SOLR-367: The create method in all TokenFilter and Tokenizer Factories
|
||||||
provided by Solr now declare their specific return types instead of just
|
provided by Solr now declare their specific return types instead of just
|
||||||
using "TokenStream" (hossman)
|
using "TokenStream" (hossman)
|
||||||
|
|
||||||
|
6. SOLR-396: Hooks add to build system for automatic generation of (stub)
|
||||||
|
Tokenizer and TokenFilter Factories.
|
||||||
|
Also: new Factories for all Tokenizers and TokenFilters provided by the
|
||||||
|
lucene-analyzers-2.2.0.jar -- includes support for German, Chinese,
|
||||||
|
Russan, Dutch, Greek, Brazilian, Thai, and French. (hossman)
|
||||||
|
|
||||||
|
|
||||||
================== Release 1.2, 20070602 ==================
|
================== Release 1.2, 20070602 ==================
|
||||||
|
|
73
build.xml
73
build.xml
|
@ -258,6 +258,79 @@
|
||||||
</javadoc>
|
</javadoc>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="stub-factories" depends="dist-jar"
|
||||||
|
description="Generates stub factories as needed">
|
||||||
|
|
||||||
|
<path id="stub.jars">
|
||||||
|
<!-- this needs to be a list of all jars that might contain
|
||||||
|
classes we want to build factories for
|
||||||
|
-->
|
||||||
|
<fileset dir="${lib}">
|
||||||
|
<include name="lucene-*.jar"/>
|
||||||
|
</fileset>
|
||||||
|
<fileset dir="${dist}">
|
||||||
|
<include name="*.jar"/>
|
||||||
|
<exclude name="*solrj*.jar"/>
|
||||||
|
</fileset>
|
||||||
|
</path>
|
||||||
|
<pathconvert property="jar.list" pathsep=" " refid="stub.jars" />
|
||||||
|
<property name="stub.list" value="${dest}/need-stub-factories.txt" />
|
||||||
|
<java fork="false"
|
||||||
|
classname="org.apache.solr.util.SuggestMissingFactories"
|
||||||
|
logError="true"
|
||||||
|
failonerror="true"
|
||||||
|
classpathref="test.run.classpath"
|
||||||
|
output="${stub.list}">
|
||||||
|
<arg line="${jar.list}" />
|
||||||
|
</java>
|
||||||
|
<fail unless="stub.src.path">...
|
||||||
|
|
||||||
|
This task requires that the property 'stub.src.path' be set.
|
||||||
|
|
||||||
|
It must contain a "path" listing directories containing source
|
||||||
|
files that this task should use when looking for classes that
|
||||||
|
need factories created, the format is platform specific --
|
||||||
|
typically it is colon seperated in Unix, semi-colon seperated
|
||||||
|
on windows, ie:
|
||||||
|
|
||||||
|
ant stub-factories -Dstub.src.path="./src:../lucene/contrib:../lucene/src/java"
|
||||||
|
|
||||||
|
FYI: The file ${stub.list} contains a list of classes
|
||||||
|
that seem to need stub factories. (if java files can be found to
|
||||||
|
use as guides for creating them).
|
||||||
|
</fail>
|
||||||
|
|
||||||
|
<pathconvert pathsep=" " property="stub.src.dirs">
|
||||||
|
<path>
|
||||||
|
<pathelement path="${stub.src.path}"/>
|
||||||
|
</path>
|
||||||
|
</pathconvert>
|
||||||
|
<exec executable="${basedir}/src/dev-tools/stub-analysis-factory-maker.pl"
|
||||||
|
dir="src/java/org/apache/solr/analysis/"
|
||||||
|
failonerror="true">
|
||||||
|
<redirector input="${stub.list}">
|
||||||
|
<!-- place to put special case classes we want to ignore -->
|
||||||
|
<inputfilterchain>
|
||||||
|
<linecontainsregexp negate="true">
|
||||||
|
<!-- only for internal Solr highlighting purposes -->
|
||||||
|
<regexp pattern="TokenOrderingFilter"/>
|
||||||
|
</linecontainsregexp>
|
||||||
|
<linecontainsregexp negate="true">
|
||||||
|
<!-- no way to leverage this in Solr -->
|
||||||
|
<regexp pattern="CachingTokenFilter"/>
|
||||||
|
</linecontainsregexp>
|
||||||
|
<linecontainsregexp negate="true">
|
||||||
|
<!-- solr and lucene both have one? ? ? ? -->
|
||||||
|
<regexp pattern="LengthFilter"/>
|
||||||
|
</linecontainsregexp>
|
||||||
|
</inputfilterchain>
|
||||||
|
</redirector>
|
||||||
|
<arg line="${stub.src.dirs}"/>
|
||||||
|
</exec>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<!-- ========================================================================= -->
|
<!-- ========================================================================= -->
|
||||||
<!-- ===================== CLIENT: solrj ============================= -->
|
<!-- ===================== CLIENT: solrj ============================= -->
|
||||||
<!-- ========================================================================= -->
|
<!-- ========================================================================= -->
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
#!/usr/bin/perl
|
||||||
|
my $ASL = q{
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
# $Id:$
|
||||||
|
# $URL:$
|
||||||
|
#
|
||||||
|
# What this script does...
|
||||||
|
#
|
||||||
|
# 1) reads a list of fully qualified package.ClassNames from STDIN
|
||||||
|
# 2) gets a list of src dirs from command line
|
||||||
|
# 3) crawl the source dirs, looking for .java files corrisponding to
|
||||||
|
# the ClassNames, if a match is found, creates a factory for it in
|
||||||
|
# the current working directory using info about the constructor
|
||||||
|
# in the orriginal .java file
|
||||||
|
#
|
||||||
|
# Note...
|
||||||
|
# * any ClassNames not found will be logged to stdout
|
||||||
|
# * script assumes generated factorories in "org.apache.solr.analysis package
|
||||||
|
# * factories will be compilable only if orriginal class had no special
|
||||||
|
# constructor args. otherwise it will have an abstract init method that
|
||||||
|
# needs filled in.
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
use File::Find;
|
||||||
|
|
||||||
|
|
||||||
|
my %classes = ();
|
||||||
|
while (<STDIN>) {
|
||||||
|
chomp;
|
||||||
|
$classes{$_} = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
find({wanted => \&wanted,
|
||||||
|
no_chdir => 1,
|
||||||
|
},
|
||||||
|
@ARGV);
|
||||||
|
|
||||||
|
sub wanted {
|
||||||
|
|
||||||
|
my $file = $File::Find::name;
|
||||||
|
|
||||||
|
return unless $file =~ m{/([^/]*)\.java};
|
||||||
|
my $class = $1;
|
||||||
|
|
||||||
|
open(my $f, "<", $file) or die "can't open $file: $!";
|
||||||
|
my $data;
|
||||||
|
{
|
||||||
|
local $/; # slurp
|
||||||
|
$data = <$f>;
|
||||||
|
}
|
||||||
|
close $f;
|
||||||
|
|
||||||
|
# skip abstract classes
|
||||||
|
return if ($data =~ m{abstract\s+(\w+\s+)*class\s+$class});
|
||||||
|
|
||||||
|
my $pack = "EMPTYPACKAGE";
|
||||||
|
if ($data =~ m/package\s+(.*);/) {
|
||||||
|
$pack = $1;
|
||||||
|
}
|
||||||
|
|
||||||
|
my $fullname = "${pack}.${class}";
|
||||||
|
# only looking for certain classes
|
||||||
|
return unless $classes{$fullname};
|
||||||
|
|
||||||
|
my @imports = $data =~ m/import\s+.*;/g;
|
||||||
|
|
||||||
|
if ($data =~ m{public \s+ ((?:\w+\s+)*) $class \s*\(\s* ([^\)]*) \) }sx) {
|
||||||
|
my $modifiers = $1;
|
||||||
|
my $argline = $2;
|
||||||
|
|
||||||
|
my $mainArgType;
|
||||||
|
my $mainArg;
|
||||||
|
my @orderedArgs;
|
||||||
|
|
||||||
|
my %args = map { my ($v,$k) = split /\s+/;
|
||||||
|
push @orderedArgs, $k;
|
||||||
|
if ($v =~ m/^(Reader|TokenStream)/) {
|
||||||
|
$mainArgType=$v;
|
||||||
|
$mainArg=$k;
|
||||||
|
}
|
||||||
|
($k, $v)
|
||||||
|
} split /\s*,\s*/, $argline;
|
||||||
|
|
||||||
|
my $type = ("Reader" eq $mainArgType) ? "Tokenizer" : "TokenFilter";
|
||||||
|
|
||||||
|
my $facClass = "${class}Factory";
|
||||||
|
my $facFile = "${facClass}.java";
|
||||||
|
die "$facFile exists" if -e $facFile;
|
||||||
|
open my $o, ">", $facFile
|
||||||
|
or die "can't write to $facFile: $!";
|
||||||
|
|
||||||
|
print $o "$ASL\n";
|
||||||
|
print $o "package org.apache.solr.analysis;\n";
|
||||||
|
print $o "import ${pack}.*;\n";
|
||||||
|
print $o "$_\n" foreach @imports;
|
||||||
|
print $o "import java.util.Map;\n";
|
||||||
|
print $o "public class ${facClass} extends Base${type}Factory {\n";
|
||||||
|
foreach my $arg (@orderedArgs) {
|
||||||
|
print $o " private $args{$arg} $arg;\n" unless $arg eq $mainArg;
|
||||||
|
}
|
||||||
|
if (1 < @orderedArgs) {
|
||||||
|
# we need to init something, stub it out
|
||||||
|
print $o " public abstract void init(SolrConfig solrConfig, Map<String, String> args) {\n";
|
||||||
|
print $o " super.init(solrConfig, args);\n";
|
||||||
|
print $o " // ABSTRACT BECAUSE IT'S A STUB .. FILL IT IN\n";
|
||||||
|
print $o " }\n";
|
||||||
|
}
|
||||||
|
print $o " public $class create($mainArgType $mainArg) {\n";
|
||||||
|
print $o " return new $class(", join(",", @orderedArgs), ");\n";
|
||||||
|
print $o " }\n";
|
||||||
|
print $o "}\n\n";
|
||||||
|
close $o;
|
||||||
|
|
||||||
|
delete $classes{$fullname}; # we're done with this one
|
||||||
|
} else {
|
||||||
|
print STDERR "can't stub $class\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keys %classes) {
|
||||||
|
print STDERR "Can't find java files for...\n";
|
||||||
|
foreach (keys %classes) {
|
||||||
|
print STDERR "$_\n";
|
||||||
|
}
|
||||||
|
exit -1;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.br.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.Map;
|
||||||
|
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public BrazilianStemFilter create(TokenStream in) {
|
||||||
|
return new BrazilianStemFilter(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.cjk.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.Map;
|
||||||
|
public class CJKTokenizerFactory extends BaseTokenizerFactory {
|
||||||
|
public CJKTokenizer create(Reader in) {
|
||||||
|
return new CJKTokenizer(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.cn.*;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import java.util.Map;
|
||||||
|
public class ChineseFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public ChineseFilter create(TokenStream in) {
|
||||||
|
return new ChineseFilter(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.cn.*;
|
||||||
|
import java.io.Reader;
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
|
import java.util.Map;
|
||||||
|
public class ChineseTokenizerFactory extends BaseTokenizerFactory {
|
||||||
|
public ChineseTokenizer create(Reader in) {
|
||||||
|
return new ChineseTokenizer(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.nl.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Map;
|
||||||
|
public class DutchStemFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public DutchStemFilter create(TokenStream _in) {
|
||||||
|
return new DutchStemFilter(_in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.fr.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Hashtable;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.Map;
|
||||||
|
public class FrenchStemFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public FrenchStemFilter create(TokenStream in) {
|
||||||
|
return new FrenchStemFilter(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,33 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.de.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.Map;
|
||||||
|
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public GermanStemFilter create(TokenStream in) {
|
||||||
|
return new GermanStemFilter(in);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.el.*;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
|
||||||
|
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
|
||||||
|
static {
|
||||||
|
CHARSETS.put("UnicodeGreek",GreekCharsets.UnicodeGreek);
|
||||||
|
CHARSETS.put("ISO",GreekCharsets.ISO);
|
||||||
|
CHARSETS.put("CP1253",GreekCharsets.CP1253);
|
||||||
|
}
|
||||||
|
|
||||||
|
private char[] charset = GreekCharsets.UnicodeGreek;
|
||||||
|
|
||||||
|
|
||||||
|
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||||
|
super.init(solrConfig, args);
|
||||||
|
String charsetName = args.get("charset");
|
||||||
|
if (null != charsetName) charset = CHARSETS.get(charsetName);
|
||||||
|
if (null == charset) {
|
||||||
|
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||||
|
"Don't understand charset: " + charsetName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public GreekLowerCaseFilter create(TokenStream in) {
|
||||||
|
return new GreekLowerCaseFilter(in,charset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,47 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.ru.*;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
import org.apache.solr.common.SolrException;
|
||||||
|
import org.apache.solr.common.SolrException.ErrorCode;
|
||||||
|
public class RussianCommon {
|
||||||
|
|
||||||
|
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
|
||||||
|
static {
|
||||||
|
CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
|
||||||
|
CHARSETS.put("KOI8",RussianCharsets.KOI8);
|
||||||
|
CHARSETS.put("CP1251",RussianCharsets.CP1251);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static char[] getCharset(String name) {
|
||||||
|
if (null == name)
|
||||||
|
return RussianCharsets.UnicodeRussian;
|
||||||
|
|
||||||
|
char[] charset = CHARSETS.get(name);
|
||||||
|
if (null == charset) {
|
||||||
|
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||||
|
"Don't understand charset: " + name);
|
||||||
|
}
|
||||||
|
return charset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.ru.*;
|
||||||
|
import java.io.Reader;
|
||||||
|
import org.apache.lucene.analysis.CharTokenizer;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
|
||||||
|
|
||||||
|
private char[] charset;
|
||||||
|
|
||||||
|
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||||
|
super.init(solrConfig, args);
|
||||||
|
charset = RussianCommon.getCharset(args.get("charset"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public RussianLetterTokenizer create(Reader in) {
|
||||||
|
return new RussianLetterTokenizer(in,charset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.ru.*;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
|
||||||
|
private char[] charset;
|
||||||
|
|
||||||
|
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||||
|
super.init(solrConfig, args);
|
||||||
|
charset = RussianCommon.getCharset(args.get("charset"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public RussianLowerCaseFilter create(TokenStream in) {
|
||||||
|
return new RussianLowerCaseFilter(in,charset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.ru.*;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
|
public class RussianStemFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
|
||||||
|
private char[] charset;
|
||||||
|
|
||||||
|
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||||
|
super.init(solrConfig, args);
|
||||||
|
charset = RussianCommon.getCharset(args.get("charset"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public RussianStemFilter create(TokenStream in) {
|
||||||
|
return new RussianStemFilter(in,charset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.solr.analysis;
|
||||||
|
import org.apache.lucene.analysis.th.*;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.lang.Character.UnicodeBlock;
|
||||||
|
import org.apache.lucene.analysis.Token;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import java.text.BreakIterator;
|
||||||
|
import java.util.Map;
|
||||||
|
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
|
||||||
|
public ThaiWordFilter create(TokenStream input) {
|
||||||
|
return new ThaiWordFilter(input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,213 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.util;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Method;
|
||||||
|
import java.lang.reflect.Modifier;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLClassLoader;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Enumeration;
|
||||||
|
import java.util.jar.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a list of Jar files, suggest missing analysis factories.
|
||||||
|
*
|
||||||
|
* @version $Id$
|
||||||
|
*/
|
||||||
|
public class SuggestMissingFactories {
|
||||||
|
|
||||||
|
public static void main(String[] args) throws ClassNotFoundException, IOException, NoSuchMethodException {
|
||||||
|
|
||||||
|
final File[] files = new File[args.length];
|
||||||
|
for (int i = 0; i < args.length; i++) {
|
||||||
|
files[i] = new File(args[i]);
|
||||||
|
}
|
||||||
|
final FindClasses finder = new FindClasses(files);
|
||||||
|
final ClassLoader cl = finder.getClassLoader();
|
||||||
|
|
||||||
|
final Class TOKENSTREAM
|
||||||
|
= cl.loadClass("org.apache.lucene.analysis.TokenStream");
|
||||||
|
final Class TOKENIZER
|
||||||
|
= cl.loadClass("org.apache.lucene.analysis.Tokenizer");
|
||||||
|
final Class TOKENFILTER
|
||||||
|
= cl.loadClass("org.apache.lucene.analysis.TokenFilter");
|
||||||
|
final Class TOKENIZERFACTORY
|
||||||
|
= cl.loadClass("org.apache.solr.analysis.TokenizerFactory");
|
||||||
|
final Class TOKENFILTERFACTORY
|
||||||
|
= cl.loadClass("org.apache.solr.analysis.TokenFilterFactory");
|
||||||
|
|
||||||
|
|
||||||
|
final HashSet<Class> result
|
||||||
|
= new HashSet<Class>(finder.findExtends(TOKENIZER));
|
||||||
|
result.addAll(finder.findExtends(TOKENFILTER));
|
||||||
|
|
||||||
|
result.removeAll(finder.findMethodReturns
|
||||||
|
(finder.findExtends(TOKENIZERFACTORY),
|
||||||
|
"create",
|
||||||
|
Reader.class).values());
|
||||||
|
result.removeAll(finder.findMethodReturns
|
||||||
|
(finder.findExtends(TOKENFILTERFACTORY),
|
||||||
|
"create",
|
||||||
|
TOKENSTREAM).values());
|
||||||
|
|
||||||
|
for (final Class c : result) {
|
||||||
|
System.out.println(c.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes in a clazz name and a jar and finds
|
||||||
|
* all classes in that jar that extend clazz.
|
||||||
|
*/
|
||||||
|
class FindClasses {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple command line test method
|
||||||
|
*/
|
||||||
|
public static void main(String[] args)
|
||||||
|
throws ClassNotFoundException, IOException, NoSuchMethodException {
|
||||||
|
|
||||||
|
FindClasses finder = new FindClasses(new File(args[1]));
|
||||||
|
ClassLoader cl = finder.getClassLoader();
|
||||||
|
Class clazz = cl.loadClass(args[0]);
|
||||||
|
if (args.length == 2) {
|
||||||
|
|
||||||
|
System.out.println("Finding all extenders of " + clazz.getName());
|
||||||
|
for (Class c : finder.findExtends(clazz)) {
|
||||||
|
System.out.println(c.getName());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
String methName = args[2];
|
||||||
|
System.out.println("Finding all extenders of " + clazz.getName() +
|
||||||
|
" with method: " + methName);
|
||||||
|
|
||||||
|
Class[] methArgs = new Class[args.length-3];
|
||||||
|
for (int i = 3; i < args.length; i++) {
|
||||||
|
methArgs[i-3] = cl.loadClass(args[i]);
|
||||||
|
}
|
||||||
|
Map<Class,Class> map = finder.findMethodReturns
|
||||||
|
(finder.findExtends(clazz),methName, methArgs);
|
||||||
|
|
||||||
|
for (Class key : map.keySet()) {
|
||||||
|
System.out.println(key.getName() + " => " + map.get(key).getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private JarFile[] jarFiles;
|
||||||
|
private ClassLoader cl;
|
||||||
|
public FindClasses(File... jars) throws IOException {
|
||||||
|
|
||||||
|
|
||||||
|
jarFiles = new JarFile[jars.length];
|
||||||
|
URL[] urls = new URL[jars.length];
|
||||||
|
try {
|
||||||
|
for (int i =0; i < jars.length; i++) {
|
||||||
|
jarFiles[i] = new JarFile(jars[i]);
|
||||||
|
urls[i] = jars[i].toURL();
|
||||||
|
}
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
throw new RuntimeException
|
||||||
|
("WTF, how can JarFile.toURL() be malformed?", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.cl = new URLClassLoader(urls, this.getClass().getClassLoader());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns a class loader that includes the jar used to
|
||||||
|
* construct this instance
|
||||||
|
*/
|
||||||
|
public ClassLoader getClassLoader() {
|
||||||
|
return this.cl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find useful concrete (ie: not anonymous, not abstract, not an interface)
|
||||||
|
* classes that extend clazz
|
||||||
|
*/
|
||||||
|
public Collection<Class> findExtends(Class<?> clazz)
|
||||||
|
throws ClassNotFoundException {
|
||||||
|
|
||||||
|
HashSet<Class> results = new HashSet<Class>();
|
||||||
|
|
||||||
|
for (JarFile jarFile : jarFiles) {
|
||||||
|
for (Enumeration<JarEntry> e = jarFile.entries();
|
||||||
|
e.hasMoreElements() ;) {
|
||||||
|
|
||||||
|
String n = e.nextElement().getName();
|
||||||
|
if (n.endsWith(".class")) {
|
||||||
|
String cn = n.replace("/",".").substring(0,n.length()-6);
|
||||||
|
Class<?> target;
|
||||||
|
try {
|
||||||
|
target = cl.loadClass(cn);
|
||||||
|
} catch (NoClassDefFoundError e1) {
|
||||||
|
throw new ClassNotFoundException
|
||||||
|
("Can't load: " + cn, e1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clazz.isAssignableFrom(target)
|
||||||
|
&& !target.isAnonymousClass()) {
|
||||||
|
|
||||||
|
int mods = target.getModifiers();
|
||||||
|
if (!(Modifier.isAbstract(mods) ||
|
||||||
|
Modifier.isInterface(mods))) {
|
||||||
|
results.add(target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a collection of classes, returns a Map containing the
|
||||||
|
* subset of those classes that impliment the method specified,
|
||||||
|
* where the value in the map is the return type of the method
|
||||||
|
*/
|
||||||
|
public Map<Class,Class> findMethodReturns(Collection<Class> clazzes,
|
||||||
|
String methodName,
|
||||||
|
Class... parameterTypes)
|
||||||
|
throws NoSuchMethodException{
|
||||||
|
|
||||||
|
HashMap<Class,Class> results = new HashMap<Class,Class>();
|
||||||
|
for (Class clazz : clazzes) {
|
||||||
|
try {
|
||||||
|
Method m = clazz.getMethod(methodName, parameterTypes);
|
||||||
|
results.put(clazz, m.getReturnType());
|
||||||
|
} catch (NoSuchMethodException e) {
|
||||||
|
/* :NOOP: we expect this and skip clazz */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue