diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java index 6a617493259..c0d8d5b2ef0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/MappingCharFilterFactory.java @@ -57,12 +57,12 @@ public class MappingCharFilterFactory extends CharFilterFactory implements List wlist = null; File mappingFile = new File(mapping); if (mappingFile.exists()) { - wlist = loader.getLines(mapping); + wlist = getLines(loader, mapping); } else { List files = splitFileNames(mapping); wlist = new ArrayList(); for (String file : files) { - List lines = loader.getLines(file.trim()); + List lines = getLines(loader, file.trim()); wlist.addAll(lines); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java index 37f349da6fb..1411fdb1bb2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilterFactory.java @@ -51,7 +51,7 @@ public class TypeTokenFilterFactory extends TokenFilterFactory implements Resour if (files.size() > 0) { stopTypes = new HashSet(); for (String file : files) { - List typesLines = loader.getLines(file.trim()); + List typesLines = getLines(loader, file.trim()); stopTypes.addAll(typesLines); } } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java index 4a2b0866389..1270b2ecff3 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilterFactory.java @@ -49,7 +49,7 @@ public class StemmerOverrideFilterFactory extends TokenFilterFactory implements dictionary = new CharArrayMap(luceneMatchVersion, files.size() * 10, ignoreCase); for (String file : files) { - List list = loader.getLines(file.trim()); + List list = getLines(loader, file.trim()); for (String line : list) { String[] mapping = line.split("\t", 2); dictionary.put(mapping[0], mapping[1]); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java index 056f2f6f441..0b4601de116 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java @@ -63,7 +63,7 @@ public class WordDelimiterFilterFactory extends TokenFilterFactory implements Re List files = splitFileNames( types ); List wlist = new ArrayList(); for( String file : files ){ - List lines = loader.getLines( file.trim() ); + List lines = getLines(loader, file.trim()); wlist.addAll( lines ); } typeTable = parseTypes(wlist); diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java index aa26da9cbf4..e1933169410 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java @@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFactory { words = new CharArraySet(luceneMatchVersion, files.size() * 10, ignoreCase); for (String file : files) { - List wlist = loader.getLines(file.trim()); + List wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase)); } } return words; } + + protected List getLines(ResourceLoader loader, String resource) throws IOException { + return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8); + } /** same as {@link #getWordSet(ResourceLoader, String, boolean)}, * except the input is in snowball format. */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java similarity index 54% rename from lucene/analysis/common/src/test/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java index 3d06aa46997..103b0140429 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceAsStreamResourceLoader.java @@ -17,17 +17,13 @@ package org.apache.lucene.analysis.util; * limitations under the License. */ -import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.CharacterCodingException; -import java.nio.charset.CodingErrorAction; -import java.util.ArrayList; -import java.util.List; - -import org.apache.lucene.util.IOUtils; +/** + * Simple ResourceLoader that uses Class.getResourceAsStream + * and Class.forName to open resources and classes, respectively. + */ public class ResourceAsStreamResourceLoader implements ResourceLoader { Class clazz; @@ -40,37 +36,6 @@ public class ResourceAsStreamResourceLoader implements ResourceLoader { return clazz.getResourceAsStream(resource); } - @Override - public List getLines(String resource) throws IOException { - BufferedReader input = null; - ArrayList lines; - try { - input = new BufferedReader(new InputStreamReader(openResource(resource), - IOUtils.CHARSET_UTF_8.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT))); - - lines = new ArrayList(); - for (String word=null; (word=input.readLine())!=null;) { - // skip initial bom marker - if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF') - word = word.substring(1); - // skip comments - if (word.startsWith("#")) continue; - word=word.trim(); - // skip blank lines - if (word.length()==0) continue; - lines.add(word); - } - } catch (CharacterCodingException ex) { - throw new RuntimeException("Error loading resource (wrong encoding?): " + resource, ex); - } finally { - if (input != null) - input.close(); - } - return lines; - } - // TODO: do this subpackages thing... wtf is that? @Override public T newInstance(String cname, Class expectedType, String... subpackages) { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java index 042379b2c31..1de426de58b 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java @@ -19,29 +19,19 @@ package org.apache.lucene.analysis.util; import java.io.IOException; import java.io.InputStream; -import java.util.List; /** * Abstraction for loading resources (streams, files, and classes). */ public interface ResourceLoader { + /** + * Opens a named resource + */ public InputStream openResource(String resource) throws IOException; /** - * Accesses a resource by name and returns the (non comment) lines - * containing data. - * - *

- * A comment line is any line that starts with the character "#" - *

- * - * @param resource - * @return a list of non-blank non-comment lines with whitespace trimmed - * from front and back. - * @throws IOException + * Creates a class of the name and expected type */ - public List getLines(String resource) throws IOException; - public T newInstance(String cname, Class expectedType, String ... subpackages); } \ No newline at end of file diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java index 12631219547..8c8866f871c 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java @@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util; import java.io.BufferedReader; import java.io.IOException; +import java.io.InputStream; import java.io.Reader; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; @@ -194,6 +198,47 @@ public class WordlistLoader { return result; } + /** + * Accesses a resource by name and returns the (non comment) lines containing + * data using the given character encoding. + * + *

+ * A comment line is any line that starts with the character "#" + *

+ * + * @return a list of non-blank non-comment lines with whitespace trimmed + * @throws IOException + */ + public static List getLines(InputStream stream, Charset charset) throws IOException{ + BufferedReader input = null; + ArrayList lines; + boolean success = false; + try { + input = getBufferedReader(IOUtils.getDecodingReader(stream, charset)); + + lines = new ArrayList(); + for (String word=null; (word=input.readLine())!=null;) { + // skip initial bom marker + if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF') + word = word.substring(1); + // skip comments + if (word.startsWith("#")) continue; + word=word.trim(); + // skip blank lines + if (word.length()==0) continue; + lines.add(word); + } + success = true; + return lines; + } finally { + if (success) { + IOUtils.close(input); + } else { + IOUtils.closeWhileHandlingException(input); + } + } + } + private static BufferedReader getBufferedReader(Reader reader) { return (reader instanceof BufferedReader) ? (BufferedReader) reader : new BufferedReader(reader); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java index 2a73121c5c2..067925f70a0 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/StringMockResourceLoader.java @@ -20,8 +20,6 @@ package org.apache.lucene.analysis.util; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; -import java.util.Arrays; -import java.util.List; /** Fake resource loader for tests: works if you want to fake reading a single file */ public class StringMockResourceLoader implements ResourceLoader { @@ -31,10 +29,6 @@ public class StringMockResourceLoader implements ResourceLoader { this.text = text; } - public List getLines(String resource) throws IOException { - return Arrays.asList(text.split("\n")); - } - // TODO: do this subpackages thing... wtf is that? public T newInstance(String cname, Class expectedType, String... subpackages) { try { diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java index 1a5d98d8073..c2b2a75f2c1 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/StringMockResourceLoader.java @@ -33,10 +33,6 @@ class StringMockResourceLoader implements ResourceLoader { this.text = text; } - public List getLines(String resource) throws IOException { - return Arrays.asList(text.split("\n")); - } - // TODO: do this subpackages thing... wtf is that? public T newInstance(String cname, Class expectedType, String... subpackages) { try { diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/ResourceAsStreamResourceLoader.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/ResourceAsStreamResourceLoader.java deleted file mode 100644 index d3cf2df1353..00000000000 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/ResourceAsStreamResourceLoader.java +++ /dev/null @@ -1,85 +0,0 @@ -package org.apache.lucene.analysis.stempel; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.nio.charset.CharacterCodingException; -import java.nio.charset.CodingErrorAction; -import java.util.ArrayList; -import java.util.List; - -import org.apache.lucene.analysis.util.ResourceLoader; -import org.apache.lucene.util.IOUtils; - -public class ResourceAsStreamResourceLoader implements ResourceLoader { - Class clazz; - - public ResourceAsStreamResourceLoader(Class clazz) { - this.clazz = clazz; - } - - @Override - public InputStream openResource(String resource) throws IOException { - return clazz.getResourceAsStream(resource); - } - - @Override - public List getLines(String resource) throws IOException { - BufferedReader input = null; - ArrayList lines; - try { - input = new BufferedReader(new InputStreamReader(openResource(resource), - IOUtils.CHARSET_UTF_8.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT))); - - lines = new ArrayList(); - for (String word=null; (word=input.readLine())!=null;) { - // skip initial bom marker - if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF') - word = word.substring(1); - // skip comments - if (word.startsWith("#")) continue; - word=word.trim(); - // skip blank lines - if (word.length()==0) continue; - lines.add(word); - } - } catch (CharacterCodingException ex) { - throw new RuntimeException("Error loading resource (wrong encoding?): " + resource, ex); - } finally { - if (input != null) - input.close(); - } - return lines; - } - - // TODO: do this subpackages thing... wtf is that? - @Override - public T newInstance(String cname, Class expectedType, String... subpackages) { - try { - Class clazz = Class.forName(cname).asSubclass(expectedType); - return clazz.newInstance(); - } catch (Exception e) { - throw new RuntimeException(e); - } - } -} diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java index f37a7bf0e80..a3cc40b77e7 100644 --- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java +++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java @@ -22,6 +22,7 @@ import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.util.ResourceAsStreamResourceLoader; /** * Tests for {@link StempelPolishStemFilterFactory} diff --git a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java index 5d0f58549f4..51c05c26c2f 100644 --- a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java +++ b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java @@ -37,6 +37,7 @@ import org.apache.lucene.analysis.util.ResourceLoaderAware; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.analysis.util.AnalysisSPILoader; +import org.apache.lucene.analysis.util.WordlistLoader; import org.apache.lucene.util.WeakIdentityMap; import org.apache.solr.common.ResourceLoader; import org.apache.solr.handler.admin.CoreAdminHandler; @@ -337,34 +338,12 @@ public class SolrResourceLoader implements ResourceLoader public List getLines(String resource, Charset charset) throws IOException{ - BufferedReader input = null; - ArrayList lines; try { - input = new BufferedReader(new InputStreamReader(openResource(resource), - charset.newDecoder() - .onMalformedInput(CodingErrorAction.REPORT) - .onUnmappableCharacter(CodingErrorAction.REPORT))); - - lines = new ArrayList(); - for (String word=null; (word=input.readLine())!=null;) { - // skip initial bom marker - if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF') - word = word.substring(1); - // skip comments - if (word.startsWith("#")) continue; - word=word.trim(); - // skip blank lines - if (word.length()==0) continue; - lines.add(word); - } + return WordlistLoader.getLines(openResource(resource), charset); } catch (CharacterCodingException ex) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, - "Error loading resource (wrong encoding?): " + resource, ex); - } finally { - if (input != null) - input.close(); + "Error loading resource (wrong encoding?): " + resource, ex); } - return lines; } /* diff --git a/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java b/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java index c9885ca2a0e..167cb1765a1 100644 --- a/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java +++ b/solr/core/src/test/org/apache/solr/analysis/TestMultiWordSynonyms.java @@ -55,10 +55,6 @@ public class TestMultiWordSynonyms extends BaseTokenStreamTestCase { this.text = text; } - public List getLines(String resource) throws IOException { - return null; - } - public T newInstance(String cname, Class expectedType, String... subpackages) { return null; } diff --git a/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java b/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java index 8c2187e540a..a161bf5c2fb 100644 --- a/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java +++ b/solr/test-framework/src/java/org/apache/solr/analysis/StringMockSolrResourceLoader.java @@ -32,10 +32,6 @@ class StringMockSolrResourceLoader implements ResourceLoader { this.text = text; } - public List getLines(String resource) throws IOException { - return Arrays.asList(text.split("\n")); - } - public T newInstance(String cname, Class expectedType, String... subpackages) { return null; }