LUCENE-4044: fix some more tests

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene2510@1364881 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-07-24 04:36:52 +00:00
parent ca658fd75d
commit 134a4a400a
15 changed files with 260 additions and 19 deletions

View File

@ -21,9 +21,10 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.io.StringReader;
import java.util.Collections;
@ -38,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@ -88,7 +89,7 @@ public class TestCommonGramsFilterFactory extends BaseTokenStreamTestCase {
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsFilterFactory factory = new CommonGramsFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);

View File

@ -21,9 +21,10 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.TestStopFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.io.StringReader;
import java.util.Collections;
@ -38,7 +39,7 @@ import java.util.HashMap;
public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
Map<String, String> args = new HashMap<String, String>();
@ -88,7 +89,7 @@ public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamTestCase {
* If no words are provided, then a set of english default stopwords is used.
*/
public void testDefaults() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(TestStopFilter.class);
assertTrue("loader is null and it shouldn't be", loader != null);
CommonGramsQueryFilterFactory factory = new CommonGramsQueryFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);

View File

@ -19,8 +19,8 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import java.util.Map;
import java.util.HashMap;
@ -32,7 +32,7 @@ import java.util.HashMap;
public class TestStopFilterFactory extends BaseTokenStreamTestCase {
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = new StopFilterFactory();
Map<String, String> args = new HashMap<String, String>();

View File

@ -20,8 +20,8 @@ package org.apache.lucene.analysis.core;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.analysis.util.InitializationException;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import org.junit.Test;
import java.util.HashMap;
@ -35,7 +35,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
@Test
public void testInform() throws Exception {
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
TypeTokenFilterFactory factory = new TypeTokenFilterFactory();
Map<String, String> args = new HashMap<String, String>();
args.put("types", "stoptypes-1.txt");
@ -95,7 +95,7 @@ public class TestTypeTokenFilterFactory extends BaseTokenStreamTestCase {
args.put("enablePositionIncrements", "false");
typeTokenFilterFactory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
typeTokenFilterFactory.init(args);
typeTokenFilterFactory.inform(new SolrResourceLoader(null, null));
typeTokenFilterFactory.inform(new ResourceAsStreamResourceLoader(getClass()));
fail("not supplying 'types' parameter should cause an InitializationException");
} catch (InitializationException e) {
// everything ok

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
foo
bar

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
junk
more

View File

@ -0,0 +1,10 @@
| This is a file in snowball format, empty lines are ignored, '|' is a comment
| Additionally, multiple words can be on the same line, allowing stopwords to be
| arranged in tables (useful in some languages where they might inflect)
| fictitious table below
|third person singular
|Subject Object Possessive Reflexive
he him his himself| masculine
she her hers herself| feminine

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<NUM>
<EMAIL>

View File

@ -0,0 +1,17 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
<HOST>
<APOSTROPHE>

View File

@ -27,8 +27,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
/**
* Simple tests to ensure the French elision filter factory is working.
@ -42,7 +42,7 @@ public class TestElisionFilterFactory extends BaseTokenStreamTestCase {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ElisionFilterFactory factory = new ElisionFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("articles", "frenchArticles.txt");
factory.init(args);
@ -61,7 +61,7 @@ public class TestElisionFilterFactory extends BaseTokenStreamTestCase {
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
Map<String, String> args = Collections.emptyMap();
factory.init(args);
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
factory.inform(loader);
TokenStream stream = factory.create(tokenizer);
assertTokenStreamContents(stream, new String[] { "avion" });
@ -75,7 +75,7 @@ public class TestElisionFilterFactory extends BaseTokenStreamTestCase {
Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
ElisionFilterFactory factory = new ElisionFilterFactory();
factory.setLuceneMatchVersion(TEST_VERSION_CURRENT);
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new ResourceAsStreamResourceLoader(getClass());
Map<String,String> args = new HashMap<String,String>();
args.put("articles", "frenchArticles.txt");
args.put("ignoreCase", "true");

View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# A set of articles for testing the French Elision filter.
# Requiring a text file is a bit weird here...
l
m
t
qu
n
s
j

View File

@ -29,7 +29,7 @@ import org.apache.lucene.analysis.payloads.FloatEncoder;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenStreamTestCase {
@ -38,7 +38,7 @@ public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenStreamTestC
args.put(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR, "float");
DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
factory.init(args);
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
factory.inform(loader);
TokenStream input = new MockTokenizer(new StringReader("the|0.1 quick|0.1 red|0.1"), MockTokenizer.WHITESPACE, false);
@ -61,7 +61,7 @@ public class TestDelimitedPayloadTokenFilterFactory extends BaseTokenStreamTestC
args.put(DelimitedPayloadTokenFilterFactory.DELIMITER_ATTR, "*");
DelimitedPayloadTokenFilterFactory factory = new DelimitedPayloadTokenFilterFactory();
factory.init(args);
ResourceLoader loader = new SolrResourceLoader("solr/collection1");
ResourceLoader loader = new StringMockResourceLoader("solr/collection1");
factory.inform(loader);
TokenStream input = new MockTokenizer(new StringReader("the*0.1 quick*0.1 red*0.1"), MockTokenizer.WHITESPACE, false);

View File

@ -0,0 +1,84 @@
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CodingErrorAction;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.IOUtils;
public class ResourceAsStreamResourceLoader implements ResourceLoader {
Class<?> clazz;
public ResourceAsStreamResourceLoader(Class<?> clazz) {
this.clazz = clazz;
}
@Override
public InputStream openResource(String resource) throws IOException {
return clazz.getResourceAsStream(resource);
}
@Override
public List<String> getLines(String resource) throws IOException {
BufferedReader input = null;
ArrayList<String> lines;
try {
input = new BufferedReader(new InputStreamReader(openResource(resource),
IOUtils.CHARSET_UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)));
lines = new ArrayList<String>();
for (String word=null; (word=input.readLine())!=null;) {
// skip initial bom marker
if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
word = word.substring(1);
// skip comments
if (word.startsWith("#")) continue;
word=word.trim();
// skip blank lines
if (word.length()==0) continue;
lines.add(word);
}
} catch (CharacterCodingException ex) {
throw new RuntimeException("Error loading resource (wrong encoding?): " + resource, ex);
} finally {
if (input != null)
input.close();
}
return lines;
}
// TODO: do this subpackages thing... wtf is that?
@Override
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
try {
Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,51 @@
package org.apache.lucene.analysis.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.List;
/** Fake resource loader for tests: works if you want to fake reading a single file */
public class StringMockResourceLoader implements ResourceLoader {
String text;
public StringMockResourceLoader(String text) {
this.text = text;
}
public List<String> getLines(String resource) throws IOException {
return Arrays.asList(text.split("\n"));
}
// TODO: do this subpackages thing... wtf is that?
public <T> T newInstance(String cname, Class<T> expectedType, String... subpackages) {
try {
Class<? extends T> clazz = Class.forName(cname).asSubclass(expectedType);
return clazz.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public InputStream openResource(String resource) throws IOException {
return new ByteArrayInputStream(text.getBytes("UTF-8"));
}
}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis.miscellaneous;
package org.apache.solr.analysis;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.solr.core.SolrResourceLoader;
import org.junit.BeforeClass;
@ -33,6 +34,7 @@ import org.junit.Test;
/**
* New WordDelimiterFilter tests... most of the tests are in ConvertedLegacyTest
*/
// TODO: add a low-level test for this factory
public class TestWordDelimiterFilterFactory extends SolrTestCaseJ4 {
@BeforeClass