mirror of https://github.com/apache/lucene.git
SOLR-396: new build system support for generating stub tokenizer/tokenfilter factories when lucene jars are updated. includes newly generated factories for all lucene-analyzers-2.2.0.jar langauges
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@593359 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
12a1302ee7
commit
99dc6e24eb
|
@ -227,6 +227,12 @@ Other Changes
|
|||
provided by Solr now declare their specific return types instead of just
|
||||
using "TokenStream" (hossman)
|
||||
|
||||
6. SOLR-396: Hooks add to build system for automatic generation of (stub)
|
||||
Tokenizer and TokenFilter Factories.
|
||||
Also: new Factories for all Tokenizers and TokenFilters provided by the
|
||||
lucene-analyzers-2.2.0.jar -- includes support for German, Chinese,
|
||||
Russan, Dutch, Greek, Brazilian, Thai, and French. (hossman)
|
||||
|
||||
|
||||
================== Release 1.2, 20070602 ==================
|
||||
|
||||
|
|
73
build.xml
73
build.xml
|
@ -258,6 +258,79 @@
|
|||
</javadoc>
|
||||
</target>
|
||||
|
||||
<target name="stub-factories" depends="dist-jar"
|
||||
description="Generates stub factories as needed">
|
||||
|
||||
<path id="stub.jars">
|
||||
<!-- this needs to be a list of all jars that might contain
|
||||
classes we want to build factories for
|
||||
-->
|
||||
<fileset dir="${lib}">
|
||||
<include name="lucene-*.jar"/>
|
||||
</fileset>
|
||||
<fileset dir="${dist}">
|
||||
<include name="*.jar"/>
|
||||
<exclude name="*solrj*.jar"/>
|
||||
</fileset>
|
||||
</path>
|
||||
<pathconvert property="jar.list" pathsep=" " refid="stub.jars" />
|
||||
<property name="stub.list" value="${dest}/need-stub-factories.txt" />
|
||||
<java fork="false"
|
||||
classname="org.apache.solr.util.SuggestMissingFactories"
|
||||
logError="true"
|
||||
failonerror="true"
|
||||
classpathref="test.run.classpath"
|
||||
output="${stub.list}">
|
||||
<arg line="${jar.list}" />
|
||||
</java>
|
||||
<fail unless="stub.src.path">...
|
||||
|
||||
This task requires that the property 'stub.src.path' be set.
|
||||
|
||||
It must contain a "path" listing directories containing source
|
||||
files that this task should use when looking for classes that
|
||||
need factories created, the format is platform specific --
|
||||
typically it is colon seperated in Unix, semi-colon seperated
|
||||
on windows, ie:
|
||||
|
||||
ant stub-factories -Dstub.src.path="./src:../lucene/contrib:../lucene/src/java"
|
||||
|
||||
FYI: The file ${stub.list} contains a list of classes
|
||||
that seem to need stub factories. (if java files can be found to
|
||||
use as guides for creating them).
|
||||
</fail>
|
||||
|
||||
<pathconvert pathsep=" " property="stub.src.dirs">
|
||||
<path>
|
||||
<pathelement path="${stub.src.path}"/>
|
||||
</path>
|
||||
</pathconvert>
|
||||
<exec executable="${basedir}/src/dev-tools/stub-analysis-factory-maker.pl"
|
||||
dir="src/java/org/apache/solr/analysis/"
|
||||
failonerror="true">
|
||||
<redirector input="${stub.list}">
|
||||
<!-- place to put special case classes we want to ignore -->
|
||||
<inputfilterchain>
|
||||
<linecontainsregexp negate="true">
|
||||
<!-- only for internal Solr highlighting purposes -->
|
||||
<regexp pattern="TokenOrderingFilter"/>
|
||||
</linecontainsregexp>
|
||||
<linecontainsregexp negate="true">
|
||||
<!-- no way to leverage this in Solr -->
|
||||
<regexp pattern="CachingTokenFilter"/>
|
||||
</linecontainsregexp>
|
||||
<linecontainsregexp negate="true">
|
||||
<!-- solr and lucene both have one? ? ? ? -->
|
||||
<regexp pattern="LengthFilter"/>
|
||||
</linecontainsregexp>
|
||||
</inputfilterchain>
|
||||
</redirector>
|
||||
<arg line="${stub.src.dirs}"/>
|
||||
</exec>
|
||||
</target>
|
||||
|
||||
|
||||
|
||||
<!-- ========================================================================= -->
|
||||
<!-- ===================== CLIENT: solrj ============================= -->
|
||||
<!-- ========================================================================= -->
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/perl
|
||||
my $ASL = q{
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
};
|
||||
|
||||
# $Id:$
|
||||
# $URL:$
|
||||
#
|
||||
# What this script does...
|
||||
#
|
||||
# 1) reads a list of fully qualified package.ClassNames from STDIN
|
||||
# 2) gets a list of src dirs from command line
|
||||
# 3) crawl the source dirs, looking for .java files corrisponding to
|
||||
# the ClassNames, if a match is found, creates a factory for it in
|
||||
# the current working directory using info about the constructor
|
||||
# in the orriginal .java file
|
||||
#
|
||||
# Note...
|
||||
# * any ClassNames not found will be logged to stdout
|
||||
# * script assumes generated factorories in "org.apache.solr.analysis package
|
||||
# * factories will be compilable only if orriginal class had no special
|
||||
# constructor args. otherwise it will have an abstract init method that
|
||||
# needs filled in.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use File::Find;
|
||||
|
||||
|
||||
my %classes = ();
|
||||
while (<STDIN>) {
|
||||
chomp;
|
||||
$classes{$_} = 1;
|
||||
}
|
||||
|
||||
find({wanted => \&wanted,
|
||||
no_chdir => 1,
|
||||
},
|
||||
@ARGV);
|
||||
|
||||
sub wanted {
|
||||
|
||||
my $file = $File::Find::name;
|
||||
|
||||
return unless $file =~ m{/([^/]*)\.java};
|
||||
my $class = $1;
|
||||
|
||||
open(my $f, "<", $file) or die "can't open $file: $!";
|
||||
my $data;
|
||||
{
|
||||
local $/; # slurp
|
||||
$data = <$f>;
|
||||
}
|
||||
close $f;
|
||||
|
||||
# skip abstract classes
|
||||
return if ($data =~ m{abstract\s+(\w+\s+)*class\s+$class});
|
||||
|
||||
my $pack = "EMPTYPACKAGE";
|
||||
if ($data =~ m/package\s+(.*);/) {
|
||||
$pack = $1;
|
||||
}
|
||||
|
||||
my $fullname = "${pack}.${class}";
|
||||
# only looking for certain classes
|
||||
return unless $classes{$fullname};
|
||||
|
||||
my @imports = $data =~ m/import\s+.*;/g;
|
||||
|
||||
if ($data =~ m{public \s+ ((?:\w+\s+)*) $class \s*\(\s* ([^\)]*) \) }sx) {
|
||||
my $modifiers = $1;
|
||||
my $argline = $2;
|
||||
|
||||
my $mainArgType;
|
||||
my $mainArg;
|
||||
my @orderedArgs;
|
||||
|
||||
my %args = map { my ($v,$k) = split /\s+/;
|
||||
push @orderedArgs, $k;
|
||||
if ($v =~ m/^(Reader|TokenStream)/) {
|
||||
$mainArgType=$v;
|
||||
$mainArg=$k;
|
||||
}
|
||||
($k, $v)
|
||||
} split /\s*,\s*/, $argline;
|
||||
|
||||
my $type = ("Reader" eq $mainArgType) ? "Tokenizer" : "TokenFilter";
|
||||
|
||||
my $facClass = "${class}Factory";
|
||||
my $facFile = "${facClass}.java";
|
||||
die "$facFile exists" if -e $facFile;
|
||||
open my $o, ">", $facFile
|
||||
or die "can't write to $facFile: $!";
|
||||
|
||||
print $o "$ASL\n";
|
||||
print $o "package org.apache.solr.analysis;\n";
|
||||
print $o "import ${pack}.*;\n";
|
||||
print $o "$_\n" foreach @imports;
|
||||
print $o "import java.util.Map;\n";
|
||||
print $o "public class ${facClass} extends Base${type}Factory {\n";
|
||||
foreach my $arg (@orderedArgs) {
|
||||
print $o " private $args{$arg} $arg;\n" unless $arg eq $mainArg;
|
||||
}
|
||||
if (1 < @orderedArgs) {
|
||||
# we need to init something, stub it out
|
||||
print $o " public abstract void init(SolrConfig solrConfig, Map<String, String> args) {\n";
|
||||
print $o " super.init(solrConfig, args);\n";
|
||||
print $o " // ABSTRACT BECAUSE IT'S A STUB .. FILL IT IN\n";
|
||||
print $o " }\n";
|
||||
}
|
||||
print $o " public $class create($mainArgType $mainArg) {\n";
|
||||
print $o " return new $class(", join(",", @orderedArgs), ");\n";
|
||||
print $o " }\n";
|
||||
print $o "}\n\n";
|
||||
close $o;
|
||||
|
||||
delete $classes{$fullname}; # we're done with this one
|
||||
} else {
|
||||
print STDERR "can't stub $class\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (keys %classes) {
|
||||
print STDERR "Can't find java files for...\n";
|
||||
foreach (keys %classes) {
|
||||
print STDERR "$_\n";
|
||||
}
|
||||
exit -1;
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.br.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Hashtable;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
public class BrazilianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public BrazilianStemFilter create(TokenStream in) {
|
||||
return new BrazilianStemFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.cjk.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import java.io.Reader;
|
||||
import java.util.Map;
|
||||
public class CJKTokenizerFactory extends BaseTokenizerFactory {
|
||||
public CJKTokenizer create(Reader in) {
|
||||
return new CJKTokenizer(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.cn.*;
|
||||
import java.util.Hashtable;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import java.util.Map;
|
||||
public class ChineseFilterFactory extends BaseTokenFilterFactory {
|
||||
public ChineseFilter create(TokenStream in) {
|
||||
return new ChineseFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.cn.*;
|
||||
import java.io.Reader;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import java.util.Map;
|
||||
public class ChineseTokenizerFactory extends BaseTokenizerFactory {
|
||||
public ChineseTokenizer create(Reader in) {
|
||||
return new ChineseTokenizer(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.nl.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
import java.util.Map;
|
||||
public class DutchStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public DutchStemFilter create(TokenStream _in) {
|
||||
return new DutchStemFilter(_in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.fr.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Hashtable;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
public class FrenchStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public FrenchStemFilter create(TokenStream in) {
|
||||
return new FrenchStemFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.de.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.Map;
|
||||
public class GermanStemFilterFactory extends BaseTokenFilterFactory {
|
||||
public GermanStemFilter create(TokenStream in) {
|
||||
return new GermanStemFilter(in);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.el.*;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
public class GreekLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
|
||||
static {
|
||||
CHARSETS.put("UnicodeGreek",GreekCharsets.UnicodeGreek);
|
||||
CHARSETS.put("ISO",GreekCharsets.ISO);
|
||||
CHARSETS.put("CP1253",GreekCharsets.CP1253);
|
||||
}
|
||||
|
||||
private char[] charset = GreekCharsets.UnicodeGreek;
|
||||
|
||||
|
||||
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||
super.init(solrConfig, args);
|
||||
String charsetName = args.get("charset");
|
||||
if (null != charsetName) charset = CHARSETS.get(charsetName);
|
||||
if (null == charset) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Don't understand charset: " + charsetName);
|
||||
}
|
||||
}
|
||||
public GreekLowerCaseFilter create(TokenStream in) {
|
||||
return new GreekLowerCaseFilter(in,charset);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.ru.*;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
public class RussianCommon {
|
||||
|
||||
private static Map<String,char[]> CHARSETS = new HashMap<String,char[]>();
|
||||
static {
|
||||
CHARSETS.put("UnicodeRussian",RussianCharsets.UnicodeRussian);
|
||||
CHARSETS.put("KOI8",RussianCharsets.KOI8);
|
||||
CHARSETS.put("CP1251",RussianCharsets.CP1251);
|
||||
}
|
||||
|
||||
public static char[] getCharset(String name) {
|
||||
if (null == name)
|
||||
return RussianCharsets.UnicodeRussian;
|
||||
|
||||
char[] charset = CHARSETS.get(name);
|
||||
if (null == charset) {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR,
|
||||
"Don't understand charset: " + name);
|
||||
}
|
||||
return charset;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.ru.*;
|
||||
import java.io.Reader;
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
public class RussianLetterTokenizerFactory extends BaseTokenizerFactory {
|
||||
|
||||
private char[] charset;
|
||||
|
||||
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||
super.init(solrConfig, args);
|
||||
charset = RussianCommon.getCharset(args.get("charset"));
|
||||
}
|
||||
|
||||
public RussianLetterTokenizer create(Reader in) {
|
||||
return new RussianLetterTokenizer(in,charset);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.ru.*;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
public class RussianLowerCaseFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
private char[] charset;
|
||||
|
||||
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||
super.init(solrConfig, args);
|
||||
charset = RussianCommon.getCharset(args.get("charset"));
|
||||
}
|
||||
|
||||
public RussianLowerCaseFilter create(TokenStream in) {
|
||||
return new RussianLowerCaseFilter(in,charset);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.ru.*;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
public class RussianStemFilterFactory extends BaseTokenFilterFactory {
|
||||
|
||||
private char[] charset;
|
||||
|
||||
public void init(SolrConfig solrConfig, Map<String, String> args) {
|
||||
super.init(solrConfig, args);
|
||||
charset = RussianCommon.getCharset(args.get("charset"));
|
||||
}
|
||||
|
||||
public RussianStemFilter create(TokenStream in) {
|
||||
return new RussianStemFilter(in,charset);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.apache.solr.analysis;
|
||||
import org.apache.lucene.analysis.th.*;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.lang.Character.UnicodeBlock;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Map;
|
||||
public class ThaiWordFilterFactory extends BaseTokenFilterFactory {
|
||||
public ThaiWordFilter create(TokenStream input) {
|
||||
return new ThaiWordFilter(input);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.util;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.HashSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Enumeration;
|
||||
import java.util.jar.*;
|
||||
|
||||
/**
|
||||
* Given a list of Jar files, suggest missing analysis factories.
|
||||
*
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SuggestMissingFactories {
|
||||
|
||||
public static void main(String[] args) throws ClassNotFoundException, IOException, NoSuchMethodException {
|
||||
|
||||
final File[] files = new File[args.length];
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
files[i] = new File(args[i]);
|
||||
}
|
||||
final FindClasses finder = new FindClasses(files);
|
||||
final ClassLoader cl = finder.getClassLoader();
|
||||
|
||||
final Class TOKENSTREAM
|
||||
= cl.loadClass("org.apache.lucene.analysis.TokenStream");
|
||||
final Class TOKENIZER
|
||||
= cl.loadClass("org.apache.lucene.analysis.Tokenizer");
|
||||
final Class TOKENFILTER
|
||||
= cl.loadClass("org.apache.lucene.analysis.TokenFilter");
|
||||
final Class TOKENIZERFACTORY
|
||||
= cl.loadClass("org.apache.solr.analysis.TokenizerFactory");
|
||||
final Class TOKENFILTERFACTORY
|
||||
= cl.loadClass("org.apache.solr.analysis.TokenFilterFactory");
|
||||
|
||||
|
||||
final HashSet<Class> result
|
||||
= new HashSet<Class>(finder.findExtends(TOKENIZER));
|
||||
result.addAll(finder.findExtends(TOKENFILTER));
|
||||
|
||||
result.removeAll(finder.findMethodReturns
|
||||
(finder.findExtends(TOKENIZERFACTORY),
|
||||
"create",
|
||||
Reader.class).values());
|
||||
result.removeAll(finder.findMethodReturns
|
||||
(finder.findExtends(TOKENFILTERFACTORY),
|
||||
"create",
|
||||
TOKENSTREAM).values());
|
||||
|
||||
for (final Class c : result) {
|
||||
System.out.println(c.getName());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes in a clazz name and a jar and finds
|
||||
* all classes in that jar that extend clazz.
|
||||
*/
|
||||
class FindClasses {
|
||||
|
||||
/**
|
||||
* Simple command line test method
|
||||
*/
|
||||
public static void main(String[] args)
|
||||
throws ClassNotFoundException, IOException, NoSuchMethodException {
|
||||
|
||||
FindClasses finder = new FindClasses(new File(args[1]));
|
||||
ClassLoader cl = finder.getClassLoader();
|
||||
Class clazz = cl.loadClass(args[0]);
|
||||
if (args.length == 2) {
|
||||
|
||||
System.out.println("Finding all extenders of " + clazz.getName());
|
||||
for (Class c : finder.findExtends(clazz)) {
|
||||
System.out.println(c.getName());
|
||||
}
|
||||
} else {
|
||||
String methName = args[2];
|
||||
System.out.println("Finding all extenders of " + clazz.getName() +
|
||||
" with method: " + methName);
|
||||
|
||||
Class[] methArgs = new Class[args.length-3];
|
||||
for (int i = 3; i < args.length; i++) {
|
||||
methArgs[i-3] = cl.loadClass(args[i]);
|
||||
}
|
||||
Map<Class,Class> map = finder.findMethodReturns
|
||||
(finder.findExtends(clazz),methName, methArgs);
|
||||
|
||||
for (Class key : map.keySet()) {
|
||||
System.out.println(key.getName() + " => " + map.get(key).getName());
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private JarFile[] jarFiles;
|
||||
private ClassLoader cl;
|
||||
public FindClasses(File... jars) throws IOException {
|
||||
|
||||
|
||||
jarFiles = new JarFile[jars.length];
|
||||
URL[] urls = new URL[jars.length];
|
||||
try {
|
||||
for (int i =0; i < jars.length; i++) {
|
||||
jarFiles[i] = new JarFile(jars[i]);
|
||||
urls[i] = jars[i].toURL();
|
||||
}
|
||||
} catch (MalformedURLException e) {
|
||||
throw new RuntimeException
|
||||
("WTF, how can JarFile.toURL() be malformed?", e);
|
||||
}
|
||||
|
||||
this.cl = new URLClassLoader(urls, this.getClass().getClassLoader());
|
||||
}
|
||||
|
||||
/**
|
||||
* returns a class loader that includes the jar used to
|
||||
* construct this instance
|
||||
*/
|
||||
public ClassLoader getClassLoader() {
|
||||
return this.cl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find useful concrete (ie: not anonymous, not abstract, not an interface)
|
||||
* classes that extend clazz
|
||||
*/
|
||||
public Collection<Class> findExtends(Class<?> clazz)
|
||||
throws ClassNotFoundException {
|
||||
|
||||
HashSet<Class> results = new HashSet<Class>();
|
||||
|
||||
for (JarFile jarFile : jarFiles) {
|
||||
for (Enumeration<JarEntry> e = jarFile.entries();
|
||||
e.hasMoreElements() ;) {
|
||||
|
||||
String n = e.nextElement().getName();
|
||||
if (n.endsWith(".class")) {
|
||||
String cn = n.replace("/",".").substring(0,n.length()-6);
|
||||
Class<?> target;
|
||||
try {
|
||||
target = cl.loadClass(cn);
|
||||
} catch (NoClassDefFoundError e1) {
|
||||
throw new ClassNotFoundException
|
||||
("Can't load: " + cn, e1);
|
||||
}
|
||||
|
||||
if (clazz.isAssignableFrom(target)
|
||||
&& !target.isAnonymousClass()) {
|
||||
|
||||
int mods = target.getModifiers();
|
||||
if (!(Modifier.isAbstract(mods) ||
|
||||
Modifier.isInterface(mods))) {
|
||||
results.add(target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a collection of classes, returns a Map containing the
|
||||
* subset of those classes that impliment the method specified,
|
||||
* where the value in the map is the return type of the method
|
||||
*/
|
||||
public Map<Class,Class> findMethodReturns(Collection<Class> clazzes,
|
||||
String methodName,
|
||||
Class... parameterTypes)
|
||||
throws NoSuchMethodException{
|
||||
|
||||
HashMap<Class,Class> results = new HashMap<Class,Class>();
|
||||
for (Class clazz : clazzes) {
|
||||
try {
|
||||
Method m = clazz.getMethod(methodName, parameterTypes);
|
||||
results.put(clazz, m.getReturnType());
|
||||
} catch (NoSuchMethodException e) {
|
||||
/* :NOOP: we expect this and skip clazz */
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue