mirror of https://github.com/apache/lucene.git
LUCENE-4044: get analysis/common tests passing
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene2510@1364887 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
134a4a400a
commit
5cc38490d0
|
@ -17,12 +17,12 @@ package org.apache.lucene.analysis.compound;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.compound.CompoundWordTokenFilterBase;
|
||||
import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilter;
|
||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.util.Map;
|
||||
import java.io.InputStream;
|
||||
|
@ -99,7 +99,7 @@ public class HyphenationCompoundWordTokenFilterFactory extends TokenFilterFactor
|
|||
} catch (Exception e) { // TODO: getHyphenationTree really shouldn't throw "Exception"
|
||||
throw new InitializationException("Exception thrown while loading dictionary and hyphenation file", e);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,8 +26,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the Dictionary compound filter factory is working.
|
||||
|
@ -40,7 +40,7 @@ public class TestDictionaryCompoundWordTokenFilterFactory extends BaseTokenStrea
|
|||
Reader reader = new StringReader("I like to play softball");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
DictionaryCompoundWordTokenFilterFactory factory = new DictionaryCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("dictionary", "compoundDictionary.txt");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -26,8 +26,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the Hyphenation compound filter factory is working.
|
||||
|
@ -40,7 +40,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
|
|||
Reader reader = new StringReader("min veninde som er lidt af en læsehest");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("dictionary", "da_compoundDictionary.txt");
|
||||
|
@ -64,7 +64,7 @@ public class TestHyphenationCompoundWordTokenFilterFactory extends BaseTokenStre
|
|||
Reader reader = new StringReader("basketballkurv");
|
||||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
HyphenationCompoundWordTokenFilterFactory factory = new HyphenationCompoundWordTokenFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("hyphenator", "da_UTF8.xml");
|
||||
args.put("minSubwordSize", "2");
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# A set of words for testing the DictionaryCompound factory
|
||||
soft
|
||||
ball
|
||||
team
|
|
@ -0,0 +1,19 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# A set of words for testing the HyphenationCompound factory,
|
||||
# in conjunction with the danish hyphenation grammar.
|
||||
læse
|
||||
hest
|
|
@ -25,7 +25,7 @@ import java.util.Map;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the Hunspell stemmer loads from factory
|
||||
|
@ -34,11 +34,11 @@ public class TestHunspellStemFilterFactory extends BaseTokenStreamTestCase {
|
|||
public void testStemming() throws Exception {
|
||||
HunspellStemFilterFactory factory = new HunspellStemFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("dictionary", "hunspell-test.dic");
|
||||
args.put("affix", "hunspell-test.aff");
|
||||
args.put("dictionary", "test.dic");
|
||||
args.put("affix", "test.aff");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
factory.inform(new SolrResourceLoader("solr/collection1"));
|
||||
factory.inform(new ResourceAsStreamResourceLoader(getClass()));
|
||||
|
||||
Reader reader = new StringReader("abc");
|
||||
TokenStream stream = factory.create(new MockTokenizer(reader, MockTokenizer.WHITESPACE, false));
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
@ -32,7 +32,7 @@ import java.util.HashMap;
|
|||
public class TestKeepFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testInform() throws Exception {
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
|
||||
assertTrue("loader is null and it shouldn't be", loader != null);
|
||||
KeepWordFilterFactory factory = new KeepWordFilterFactory();
|
||||
Map<String, String> args = new HashMap<String, String>();
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the keyword marker filter factory is working.
|
||||
|
@ -40,7 +40,7 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase {
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new StringMockResourceLoader("cats");
|
||||
args.put("protected", "protwords.txt");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
|
@ -55,7 +55,7 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenStreamTestCase {
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
KeywordMarkerFilterFactory factory = new KeywordMarkerFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new StringMockResourceLoader("cats");
|
||||
args.put("protected", "protwords.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||
|
||||
/**
|
||||
* Simple tests to ensure the stemmer override filter factory is working.
|
||||
|
@ -41,7 +41,7 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenStreamTestCase {
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
|
||||
args.put("dictionary", "stemdict.txt");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
|
@ -56,7 +56,7 @@ public class TestStemmerOverrideFilterFactory extends BaseTokenStreamTestCase {
|
|||
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
||||
StemmerOverrideFilterFactory factory = new StemmerOverrideFilterFactory();
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new StringMockResourceLoader("dogs\tcat");
|
||||
args.put("dictionary", "stemdict.txt");
|
||||
args.put("ignoreCase", "true");
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
foo
|
||||
bar
|
|
@ -0,0 +1,17 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
junk
|
||||
more
|
|
@ -21,19 +21,16 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.lucene.analysis.util.StringMockResourceLoader;
|
||||
import org.tartarus.snowball.ext.EnglishStemmer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class TestSnowballPorterFilterFactory extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -53,12 +50,23 @@ public class TestSnowballPorterFilterFactory extends BaseTokenStreamTestCase {
|
|||
|
||||
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
|
||||
factory.init(args);
|
||||
factory.inform(new LinesMockSolrResourceLoader(new ArrayList<String>()));
|
||||
factory.inform(new StringMockResourceLoader(""));
|
||||
Tokenizer tokenizer = new MockTokenizer(
|
||||
new StringReader(StrUtils.join(Arrays.asList(test), ' ')), MockTokenizer.WHITESPACE, false);
|
||||
new StringReader(join(test, ' ')), MockTokenizer.WHITESPACE, false);
|
||||
TokenStream stream = factory.create(tokenizer);
|
||||
assertTokenStreamContents(stream, gold);
|
||||
}
|
||||
|
||||
String join(String[] stuff, char sep) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < stuff.length; i++) {
|
||||
if (i > 0) {
|
||||
sb.append(sep);
|
||||
}
|
||||
sb.append(stuff[i]);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
class LinesMockSolrResourceLoader implements ResourceLoader {
|
||||
List<String> lines;
|
||||
|
@ -85,7 +93,7 @@ public class TestSnowballPorterFilterFactory extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testProtected() throws Exception {
|
||||
SnowballPorterFilterFactory factory = new SnowballPorterFilterFactory();
|
||||
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
|
||||
ResourceLoader loader = new StringMockResourceLoader("ridding");
|
||||
Map<String,String> args = new HashMap<String,String>();
|
||||
args.put("protected", "protwords.txt");
|
||||
args.put("language", "English");
|
||||
|
|
Loading…
Reference in New Issue