generate analysis factories for trunk's new lang support

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/branches/solr@924164 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-03-17 05:38:14 +00:00
parent b44d8b0763
commit e55919d7ea
12 changed files with 412 additions and 1 deletions

View File

@ -1,2 +1,2 @@
AnyObjectId[f54b406771bab8fd720fb27279c9a6fca94a9331] was removed in git history.
AnyObjectId[b0cbcc9f623660e1a8ccdf2d22e0f9847687b260] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.bg.BulgarianStemFilter;
/** Factory for BulgarianStemFilter */
public class BulgarianStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new BulgarianStemFilter(input);
}
}

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cz.CzechStemFilter;
/** Factory for CzechStemFilter */
public class CzechStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new CzechStemFilter(input);
}
}

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
/** Factory for HindiNormalizationFilter */
public class HindiNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiNormalizationFilter(input);
}
}

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.hi.HindiStemFilter;
/** Factory for HindiStemFilter */
public class HindiStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new HindiStemFilter(input);
}
}

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
/** Factory for IndicNormalizationFilter */
public class IndicNormalizationFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new IndicNormalizationFilter(input);
}
}

View File

@ -0,0 +1,31 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.in.IndicTokenizer;
/** Factory for IndicTokenizer */
public class IndicTokenizerFactory extends BaseTokenizerFactory {
public Tokenizer create(Reader input) {
assureMatchVersion();
return new IndicTokenizer(luceneMatchVersion, input);
}
}

View File

@ -0,0 +1,28 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
/** Factory for TurkishLowerCaseFilter */
public class TurkishLowerCaseFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new TurkishLowerCaseFilter(input);
}
}

View File

@ -0,0 +1,41 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Simple tests to ensure the Bulgarian stem filter factory is working.
*/
public class TestBulgarianStemFilterFactory extends BaseTokenTestCase {
/**
* Ensure the filter actually stems text.
*/
public void testStemming() throws Exception {
Reader reader = new StringReader("компютри");
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
BulgarianStemFilterFactory factory = new BulgarianStemFilterFactory();
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "компютр" });
}
}

View File

@ -0,0 +1,41 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Simple tests to ensure the Czech stem filter factory is working.
*/
public class TestCzechStemFilterFactory extends BaseTokenTestCase {
/**
* Ensure the filter actually stems text.
*/
public void testStemming() throws Exception {
Reader reader = new StringReader("angličtí");
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
CzechStemFilterFactory factory = new CzechStemFilterFactory();
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "anglick" });
}
}

View File

@ -0,0 +1,89 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
/**
* Simple tests to ensure the Hindi filter Factories are working.
*/
public class TestHindiFilters extends BaseTokenTestCase {
/**
* Test IndicTokenizerFactory
*/
public void testTokenizer() throws Exception {
Reader reader = new StringReader("मुझे हिंदी का और अभ्यास करना होगा ।");
IndicTokenizerFactory factory = new IndicTokenizerFactory();
factory.init(DEFAULT_VERSION_PARAM);
Tokenizer stream = factory.create(reader);
assertTokenStreamContents(stream,
new String[] { "मुझे", "हिंदी", "का", "और", "अभ्यास", "करना", "होगा" });
}
/**
* Test IndicNormalizationFilterFactory
*/
public void testIndicNormalizer() throws Exception {
Reader reader = new StringReader("ত্‍ अाैर");
IndicTokenizerFactory factory = new IndicTokenizerFactory();
IndicNormalizationFilterFactory filterFactory = new IndicNormalizationFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
filterFactory.init(DEFAULT_VERSION_PARAM);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = filterFactory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "", "और" });
}
/**
* Test HindiNormalizationFilterFactory
*/
public void testHindiNormalizer() throws Exception {
Reader reader = new StringReader("क़िताब");
IndicTokenizerFactory factory = new IndicTokenizerFactory();
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
hindiFilterFactory.init(DEFAULT_VERSION_PARAM);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = indicFilterFactory.create(tokenizer);
stream = hindiFilterFactory.create(stream);
assertTokenStreamContents(stream, new String[] {"किताब"});
}
/**
* Test HindiStemFilterFactory
*/
public void testStemmer() throws Exception {
Reader reader = new StringReader("किताबें");
IndicTokenizerFactory factory = new IndicTokenizerFactory();
IndicNormalizationFilterFactory indicFilterFactory = new IndicNormalizationFilterFactory();
HindiNormalizationFilterFactory hindiFilterFactory = new HindiNormalizationFilterFactory();
HindiStemFilterFactory stemFactory = new HindiStemFilterFactory();
factory.init(DEFAULT_VERSION_PARAM);
stemFactory.init(DEFAULT_VERSION_PARAM);
Tokenizer tokenizer = factory.create(reader);
TokenStream stream = indicFilterFactory.create(tokenizer);
stream = hindiFilterFactory.create(stream);
stream = stemFactory.create(stream);
assertTokenStreamContents(stream, new String[] {"किताब"});
}
}

View File

@ -0,0 +1,41 @@
package org.apache.solr.analysis;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
/**
* Simple tests to ensure the Turkish lowercase filter factory is working.
*/
public class TestTurkishLowerCaseFilterFactory extends BaseTokenTestCase {
/**
* Ensure the filter actually lowercases text.
*/
public void testCasing() throws Exception {
Reader reader = new StringReader("AĞACI");
Tokenizer tokenizer = new WhitespaceTokenizer(reader);
TurkishLowerCaseFilterFactory factory = new TurkishLowerCaseFilterFactory();
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "ağacı" });
}
}