mirror of https://github.com/apache/lucene.git
SOLR-2003: report (throw exception) rather than replace charset errors in SolrResourceLoader.getLines
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@964430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
278448babc
commit
67d2e87fee
|
@ -467,6 +467,9 @@ Other Changes
|
|||
* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system
|
||||
(hossman)
|
||||
|
||||
* SOLR-2003: SolrResourceLoader will report any encoding errors, rather than
|
||||
silently using replacement characters for invalid inputs (blargy via rmuir)
|
||||
|
||||
Build
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
import javax.naming.Context;
|
||||
|
@ -316,7 +317,9 @@ public class SolrResourceLoader implements ResourceLoader
|
|||
ArrayList<String> lines;
|
||||
try {
|
||||
input = new BufferedReader(new InputStreamReader(openResource(resource),
|
||||
charset));
|
||||
charset.newDecoder()
|
||||
.onMalformedInput(CodingErrorAction.REPORT)
|
||||
.onUnmappableCharacter(CodingErrorAction.REPORT)));
|
||||
|
||||
lines = new ArrayList<String>();
|
||||
for (String word=null; (word=input.readLine())!=null;) {
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.MalformedInputException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -117,4 +118,14 @@ public class ResourceLoaderTest extends TestCase
|
|||
assertEquals(1, lines.size());
|
||||
assertEquals("BOMsAreEvil", lines.get(0));
|
||||
}
|
||||
|
||||
public void testWrongEncoding() throws Exception {
|
||||
String wrongEncoding = "stopwordsWrongEncoding.txt";
|
||||
SolrResourceLoader loader = new SolrResourceLoader(null);
|
||||
// ensure we get our exception
|
||||
try {
|
||||
List<String> lines = loader.getLines(wrongEncoding);
|
||||
fail();
|
||||
} catch (MalformedInputException expected) {}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# stopwords in the wrong encoding (ISO-8859-1).
|
||||
# tests resourceloader's ability to report wrongly encoded files.
|
||||
bañadores
|
Loading…
Reference in New Issue