From 1f47a886a37c0676d9874ec71b601bda1a1a56e2 Mon Sep 17 00:00:00 2001 From: James Dyer Date: Tue, 20 Nov 2012 19:38:52 +0000 Subject: [PATCH] SOLR-4096: FileDataSource & FieldReaderDataSource to default to UTF-8 charset git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1411812 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 6 +++++- .../dataimport/FieldReaderDataSource.java | 2 +- .../handler/dataimport/FileDataSource.java | 2 +- .../TestFileListEntityProcessor.java | 20 +++++++++---------- .../TestFileListWithLineEntityProcessor.java | 6 +++--- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 8ed90d9b454..655cd8bb454 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -258,9 +258,13 @@ Other Changes * SOLR-3602: Update ZooKeeper to 3.4.5 (Mark Miller) -* SOLR-4095 DIH NumberFormatTransformer & DateFormatTransformer default to the +* SOLR-4095: DIH NumberFormatTransformer & DateFormatTransformer default to the ROOT Locale if none is specified. These previously used the machine's default. (James Dyer) + +* SOLR-4096: DIH FileDataSource & FieldReaderDataSource default to UTF-8 encoding + if none is specified. These previously used the machine's default. + (James Dyer) ================== 4.0.0 ================== diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java index bbe9bdcb8d9..3ba608c4990 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FieldReaderDataSource.java @@ -106,7 +106,7 @@ public class FieldReaderDataSource extends DataSource { private Reader getReader(Blob blob) throws SQLException, UnsupportedEncodingException { if (encoding == null) { - return (new InputStreamReader(blob.getBinaryStream())); + return (new InputStreamReader(blob.getBinaryStream(), "UTF-8")); } else { return (new InputStreamReader(blob.getBinaryStream(), encoding)); } diff --git a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java index a15346935a5..d9892186bb0 100644 --- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java +++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/FileDataSource.java @@ -125,7 +125,7 @@ public class FileDataSource extends DataSource { protected Reader openStream(File file) throws FileNotFoundException, UnsupportedEncodingException { if (encoding == null) { - return new InputStreamReader(new FileInputStream(file)); + return new InputStreamReader(new FileInputStream(file), "UTF-8"); } else { return new InputStreamReader(new FileInputStream(file), encoding); } diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java index 51cc9457b3b..3efd942b9cb 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListEntityProcessor.java @@ -41,9 +41,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa tmpdir.delete(); tmpdir.mkdir(); tmpdir.deleteOnExit(); - createFile(tmpdir, "a.xml", "a.xml".getBytes(), false); - createFile(tmpdir, "b.xml", "b.xml".getBytes(), false); - createFile(tmpdir, "c.props", "c.props".getBytes(), false); + createFile(tmpdir, "a.xml", "a.xml".getBytes("UTF-8"), false); + createFile(tmpdir, "b.xml", "b.xml".getBytes("UTF-8"), false); + createFile(tmpdir, "c.props", "c.props".getBytes("UTF-8"), false); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "xml$", FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath()); @@ -138,9 +138,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa tmpdir.delete(); tmpdir.mkdir(); tmpdir.deleteOnExit(); - createFile(tmpdir, "a.xml", "a.xml".getBytes(), true); - createFile(tmpdir, "b.xml", "b.xml".getBytes(), true); - createFile(tmpdir, "c.props", "c.props".getBytes(), true); + createFile(tmpdir, "a.xml", "a.xml".getBytes("UTF-8"), true); + createFile(tmpdir, "b.xml", "b.xml".getBytes("UTF-8"), true); + createFile(tmpdir, "c.props", "c.props".getBytes("UTF-8"), true); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "xml$", FileListEntityProcessor.BASE_DIR, tmpdir.getAbsolutePath(), @@ -162,7 +162,7 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa VariableResolver resolver = new VariableResolver(); String lastMod = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ROOT).format(new Date(System.currentTimeMillis() - 50000)); resolver.addNamespace("a", createMap("x", lastMod)); - createFile(tmpdir, "t.xml", "t.xml".getBytes(), false); + createFile(tmpdir, "t.xml", "t.xml".getBytes("UTF-8"), false); fList = getFiles(resolver, attrs); assertEquals(1, fList.size()); assertEquals("File name must be t.xml", new File(tmpdir, "t.xml").getAbsolutePath(), fList.get(0)); @@ -177,9 +177,9 @@ public class TestFileListEntityProcessor extends AbstractDataImportHandlerTestCa File childdir = new File(tmpdir + "/child" ); childdir.mkdirs(); childdir.deleteOnExit(); - createFile(childdir, "a.xml", "a.xml".getBytes(), true); - createFile(childdir, "b.xml", "b.xml".getBytes(), true); - createFile(childdir, "c.props", "c.props".getBytes(), true); + createFile(childdir, "a.xml", "a.xml".getBytes("UTF-8"), true); + createFile(childdir, "b.xml", "b.xml".getBytes("UTF-8"), true); + createFile(childdir, "c.props", "c.props".getBytes("UTF-8"), true); Map attrs = createMap( FileListEntityProcessor.FILE_NAME, "^.*\\.xml$", FileListEntityProcessor.BASE_DIR, childdir.getAbsolutePath(), diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java index 6800ba83e4a..31e9216367a 100644 --- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java +++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestFileListWithLineEntityProcessor.java @@ -33,9 +33,9 @@ public class TestFileListWithLineEntityProcessor extends AbstractDataImportHandl tmpdir.delete(); tmpdir.mkdir(); tmpdir.deleteOnExit(); - createFile(tmpdir, "a.txt", "a line one\na line two\na line three".getBytes(), false); - createFile(tmpdir, "b.txt", "b line one\nb line two".getBytes(), false); - createFile(tmpdir, "c.txt", "c line one\nc line two\nc line three\nc line four".getBytes(), false); + createFile(tmpdir, "a.txt", "a line one\na line two\na line three".getBytes("UTF-8"), false); + createFile(tmpdir, "b.txt", "b line one\nb line two".getBytes("UTF-8"), false); + createFile(tmpdir, "c.txt", "c line one\nc line two\nc line three\nc line four".getBytes("UTF-8"), false); String config = generateConfig(tmpdir); LocalSolrQueryRequest request = lrf.makeRequest(