SOLR-2003: report (throw exception) rather than replace charset errors in SolrResourceLoader.getLines

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@964430 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-07-15 13:50:48 +00:00
parent 278448babc
commit 67d2e87fee
4 changed files with 36 additions and 1 deletions

View File

@ -467,6 +467,9 @@ Other Changes
* SOLR-1946: Misc improvements to the SystemInfoHandler: /admin/system
(hossman)
* SOLR-2003: SolrResourceLoader will report any encoding errors, rather than
silently using replacement characters for invalid inputs (blargy via rmuir)
Build
----------------------

View File

@ -33,6 +33,7 @@ import java.util.concurrent.ConcurrentHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.charset.Charset;
import java.nio.charset.CodingErrorAction;
import java.lang.reflect.Constructor;
import javax.naming.Context;
@ -316,7 +317,9 @@ public class SolrResourceLoader implements ResourceLoader
ArrayList<String> lines;
try {
input = new BufferedReader(new InputStreamReader(openResource(resource),
charset));
charset.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)));
lines = new ArrayList<String>();
for (String word=null; (word=input.readLine())!=null;) {

View File

@ -31,6 +31,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
import java.io.File;
import java.io.InputStream;
import java.nio.charset.MalformedInputException;
import java.util.Arrays;
import java.util.List;
@ -117,4 +118,14 @@ public class ResourceLoaderTest extends TestCase
assertEquals(1, lines.size());
assertEquals("BOMsAreEvil", lines.get(0));
}
public void testWrongEncoding() throws Exception {
String wrongEncoding = "stopwordsWrongEncoding.txt";
SolrResourceLoader loader = new SolrResourceLoader(null);
// ensure we get our exception
try {
List<String> lines = loader.getLines(wrongEncoding);
fail();
} catch (MalformedInputException expected) {}
}
}

View File

@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# stopwords in the wrong encoding (ISO-8859-1).
# tests resourceloader's ability to report wrongly encoded files.
bañadores