From 73a7da17abca06e1aad51a94dcba557d03c793d8 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 7 Jul 2012 20:18:56 +0000 Subject: [PATCH] LUCENE-4199: more default locale/encoding fixes git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4199@1358635 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/hunspell/HunspellStemmer.java | 3 ++- .../src/test/org/egothor/stemmer/TestCompile.java | 5 +++-- .../benchmark/byTask/utils/StreamUtilsTest.java | 7 ++++--- .../lucene/benchmark/quality/TestQualityRun.java | 4 +++- .../apache/lucene/codecs/BlockTreeTermsReader.java | 14 ++++++++++++-- .../demo/xmlparser/FormBasedXmlQueryDemo.java | 3 ++- .../apache/lucene/search/join/TestBlockJoin.java | 3 ++- 7 files changed, 28 insertions(+), 11 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java index 6dd4aa19266..fe2b7571fb2 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.Charset; import java.text.ParseException; import java.util.ArrayList; import java.util.Arrays; @@ -330,7 +331,7 @@ public class HunspellStemmer { HunspellStemmer stemmer = new HunspellStemmer(dictionary); - Scanner scanner = new Scanner(System.in); + Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name()); System.out.print("> "); while (scanner.hasNextLine()) { diff --git a/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java b/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java index 054365c7dee..ba2dd729a8c 100644 --- a/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java +++ b/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java @@ -60,13 +60,14 @@ import java.io.BufferedReader; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.io.LineNumberReader; import java.net.URI; import java.util.Locale; import java.util.StringTokenizer; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; public class TestCompile extends LuceneTestCase { @@ -121,7 +122,7 @@ public class TestCompile extends LuceneTestCase { private static void assertTrie(Trie trie, String file, boolean usefull, boolean storeorig) throws Exception { LineNumberReader in = new LineNumberReader(new BufferedReader( - new FileReader(file))); + new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8))); for (String line = in.readLine(); line != null; line = in.readLine()) { try { diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java index bf8f04cfa7c..cb15be2feb3 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java @@ -31,6 +31,7 @@ import java.io.OutputStreamWriter; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util._TestUtil; import org.junit.After; import org.junit.Before; @@ -88,7 +89,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { private File rawTextFile(String ext) throws Exception { File f = new File(testDir,"testfile." + ext); - BufferedWriter w = new BufferedWriter(new FileWriter(f)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -117,7 +118,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { } private void writeText(OutputStream os) throws IOException { - BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); + BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8)); w.write(TEXT); w.newLine(); w.close(); @@ -125,7 +126,7 @@ public class StreamUtilsTest extends BenchmarkTestCase { private void assertReadText(File f) throws Exception { InputStream ir = StreamUtils.inputStream(f); - InputStreamReader in = new InputStreamReader(ir); + InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8); BufferedReader r = new BufferedReader(in); String line = r.readLine(); assertEquals("Wrong text found in "+f.getName(), TEXT, line); diff --git a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java index e12950b41e1..09502a10140 100644 --- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java +++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java @@ -31,7 +31,9 @@ import java.io.BufferedReader; import java.io.File; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStreamWriter; import java.io.PrintWriter; +import java.nio.charset.Charset; /** * Test that quality run does its job. @@ -55,7 +57,7 @@ public class TestQualityRun extends BenchmarkTestCase { int maxResults = 1000; String docNameField = "doctitle"; // orig docID is in the linedoc format title - PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null; + PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null; // prepare topics InputStream topics = getClass().getResourceAsStream("trecTopics.txt"); diff --git a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java index dae492c30a1..b0c9bb7cb89 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java @@ -20,6 +20,7 @@ package org.apache.lucene.codecs; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.PrintStream; +import java.io.UnsupportedEncodingException; import java.util.Comparator; import java.util.Iterator; import java.util.Locale; @@ -346,7 +347,12 @@ public class BlockTreeTermsReader extends FieldsProducer { @Override public String toString() { final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); - final PrintStream out = new PrintStream(bos); + PrintStream out; + try { + out = new PrintStream(bos, false, "UTF-8"); + } catch (UnsupportedEncodingException bogus) { + throw new RuntimeException(bogus); + } out.println(" index FST:"); out.println(" " + indexNodeCount + " nodes"); @@ -379,7 +385,11 @@ public class BlockTreeTermsReader extends FieldsProducer { assert totalBlockCount == total; } - return bos.toString(); + try { + return bos.toString("UTF-8"); + } catch (UnsupportedEncodingException bogus) { + throw new RuntimeException(bogus); + } } } diff --git a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index d92f7722484..67468e52cd3 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -48,6 +48,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.Version; /** @@ -131,7 +132,7 @@ public class FormBasedXmlQueryDemo extends HttpServlet { IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); IndexWriter writer = new IndexWriter(rd, iwConfig); InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); - BufferedReader br = new BufferedReader(new InputStreamReader(dataIn)); + BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8)); String line = br.readLine(); final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); textNoNorms.setOmitNorms(true); diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java index b9e6aa89d60..44555003b12 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestBlockJoin.java @@ -32,6 +32,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Locale; public class TestBlockJoin extends LuceneTestCase { @@ -638,7 +639,7 @@ public class TestBlockJoin extends LuceneTestCase { int childId = Integer.parseInt(document.get("childID")); assertTrue(explanation.isMatch()); assertEquals(hit.score, explanation.getValue(), 0.0f); - assertEquals(String.format("Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription()); + assertEquals(String.format(Locale.ROOT, "Score based on child doc range from %d to %d", hit.doc - 1 - childId, hit.doc - 1), explanation.getDescription()); } }