From 8a0e3198ca553688cdfab25823c60a67c4d2f0db Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Thu, 5 Jul 2012 14:41:03 +0000 Subject: [PATCH] LUCENE-4194: more encoding fixes. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1357636 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/benchmark/byTask/TestPerfTasksParse.java | 5 ++++- .../org/apache/lucene/search/highlight/HighlighterTest.java | 2 +- .../test/org/apache/lucene/queryparser/xml/TestParser.java | 3 ++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java index a06efa59064..36734bd5c85 100755 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksParse.java @@ -19,9 +19,12 @@ package org.apache.lucene.benchmark.byTask; import java.io.File; import java.io.FileFilter; +import java.io.FileInputStream; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.StringReader; +import java.nio.charset.Charset; import java.util.ArrayList; import org.apache.lucene.benchmark.byTask.feeds.AbstractQueryMaker; @@ -113,7 +116,7 @@ public class TestPerfTasksParse extends LuceneTestCase { public boolean accept(File pathname) { return pathname.isFile() && pathname.getName().endsWith(".alg"); } })) { try { - Config config = new Config(new FileReader(algFile)); + Config config = new Config(new InputStreamReader(new FileInputStream(algFile), "UTF-8")); String contentSource = config.get("content.source", null); if (contentSource != null) { Class.forName(contentSource); } config.set("work.dir", new File(TEMP_DIR,"work").getAbsolutePath()); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index 4f0fddbafa8..67ea75f940c 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -1371,7 +1371,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte // now an ugly built of XML parsing to test the snippet is encoded OK DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); - org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes())); + org.w3c.dom.Document doc = db.parse(new ByteArrayInputStream(xhtml.getBytes("UTF-8"))); Element root = doc.getDocumentElement(); NodeList nodes = root.getElementsByTagName("body"); Element body = (Element) nodes.item(0); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java index 4f7e4380633..e087f644a54 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestParser.java @@ -61,7 +61,8 @@ public class TestParser extends LuceneTestCase { //initialize the parser builder = new CorePlusExtensionsParser("contents", analyzer); - BufferedReader d = new BufferedReader(new InputStreamReader(TestParser.class.getResourceAsStream("reuters21578.txt"))); + BufferedReader d = new BufferedReader(new InputStreamReader( + TestParser.class.getResourceAsStream("reuters21578.txt"), "US-ASCII")); dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(Version.LUCENE_40, analyzer)); String line = d.readLine();