LUCENE-3312: Merge up to trunk HEAD. There was a really huge change (LUCENE-4199).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1359283 13f79535-47bb-0310-9956-ffa450edef68
2012-07-09 17:04:57 +00:00 · 2012-07-09 17:04:57 +00:00 · 27aa2f6a28
parent dcab2d6d53 2f123d1209
commit 27aa2f6a28
261 changed files with 1795 additions and 1774 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -62,6 +62,12 @@ Build
 * LUCENE-4115: JAR resolution/ cleanup should be done automatically for ant 
  clean/ eclipse/ resolve (Dawid Weiss)
 * LUCENE-4199: Add a new target "check-forbidden-apis", that parses all
  generated .class files for use of APIs that use default charset, default
  locale, or default timezone and fail build if violations found. This
  ensures, that Lucene / Solr is independent on local configuration options.
  (Uwe Schindler, Robert Muir, Dawid Weiss)
 Documentation
 * LUCENE-4195: Added package documentation and examples for 
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@ -61,50 +61,50 @@
          executable="${python.exe}" failonerror="true" logerror="true">
      <arg value="htmlentity.py"/>
    </exec>
    <fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
  </target>
  <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
      <classpath refid="jflex.classpath"/>
    </taskdef>
-    <jflex file="src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex"
+    <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
           outdir="src/java/org/apache/lucene/analysis/wikipedia"
           nobak="on"/>
  </target>
  <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
    </taskdef>
-
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
-    <jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
    <jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
    <jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard/std31"
           nobak="on" />
  </target>
  <target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
    <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
    </taskdef>
-    <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
           outdir="src/java/org/apache/lucene/analysis/standard"
           nobak="on" />
    <jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard/std31"
           nobak="on" />
    <jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
           outdir="src/java/org/apache/lucene/analysis/standard/std34"
           nobak="on" />
  </target>
  <!-- Remove the inappropriate JFlex-generated constructor -->
  <macrodef name="run-jflex">
    <attribute name="dir"/>
    <attribute name="name"/>
    <sequential>
      <jflex file="@{dir}/@{name}.jflex"
             outdir="@{dir}"
             nobak="on" />
      <replaceregexp file="@{dir}/@{name}.java"
                     match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
                     replace="" flags="sg"/>
    </sequential>
  </macrodef>
  <target name="clean-jflex">
    <delete>
      <fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java">
        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
      <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
        <containsregexp expression="generated.*by.*JFlex"/>
      </fileset>
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemmer.java
@ -1,5 +1,7 @@
 package org.apache.lucene.analysis.br;
 import java.util.Locale;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -21,6 +23,7 @@ package org.apache.lucene.analysis.br;
 * A stemmer for Brazilian Portuguese words.
 */
 public class BrazilianStemmer {
  private static final Locale locale = new Locale("pt", "BR");
 	/**
 	 * Changed term
@ -243,7 +246,7 @@ public class BrazilianStemmer {
      return null ;
    }
-    value = value.toLowerCase() ;
+    value = value.toLowerCase(locale) ;
    for (j=0 ; j < value.length() ; j++) {
      if ((value.charAt(j) == 'á') ||
          (value.charAt(j) == 'â') ||
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@ -1,6 +1,6 @@
 package org.apache.lucene.analysis.charfilter;
-/**
+/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemmer.java
@ -1,4 +1,7 @@
 package org.apache.lucene.analysis.de;
 import java.util.Locale;
 // This file is encoded in UTF-8
 /*
@ -38,6 +41,8 @@ public class GermanStemmer
     */
    private int substCount = 0;
    private static final Locale locale = new Locale("de", "DE");
    /**
     * Stemms the given term to an unique <tt>discriminator</tt>.
     *
@ -47,7 +52,7 @@ public class GermanStemmer
    protected String stem( String term )
    {
      // Use lowercase for medium stemming.
-      term = term.toLowerCase();
+      term = term.toLowerCase(locale);
      if ( !isStemmable( term ) )
        return term;
      // Reset the StringBuilder.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellDictionary.java
@ -252,7 +252,7 @@ public class HunspellDictionary {
      }
      String condition = ruleArgs[4];
-      affix.setCondition(condition, String.format(conditionPattern, condition));
+      affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition));
      affix.setCrossProduct(crossProduct);
      List<HunspellAffix> list = affixes.get(affix.getAppend());
@ -376,7 +376,7 @@ public class HunspellDictionary {
        Arrays.sort(wordForm.getFlags());
        entry = line.substring(0, flagSep);
        if(ignoreCase) {
-          entry = entry.toLowerCase(Locale.ENGLISH);
+          entry = entry.toLowerCase(Locale.ROOT);
        }
      }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemmer.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.text.ParseException;
 import java.util.ArrayList;
 import java.util.Arrays;
@ -330,7 +331,7 @@ public class HunspellStemmer {
    HunspellStemmer stemmer = new HunspellStemmer(dictionary);
-    Scanner scanner = new Scanner(System.in);
+    Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name());
    System.out.print("> ");
    while (scanner.hasNextLine()) {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.sinks;
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.util.Date;
 import java.util.Locale;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.AttributeSource;
@ -37,10 +38,12 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
  protected CharTermAttribute termAtt;
  /**
-   * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
+   * Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
   * DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as 
   * the {@link java.text.DateFormat} object.
   */
  public DateRecognizerSinkFilter() {
-    this(DateFormat.getDateInstance());
+    this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT));
  }
  public DateRecognizerSinkFilter(DateFormat dateFormat) {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@ -1,8 +1,8 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
 package org.apache.lucene.analysis.standard;
-/*
+/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 9/30/11 12:10 PM from the specification file
+ * on 08.07.12 16:59 from the specification file
- * <tt>/lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
 */
 class ClassicTokenizerImpl implements StandardTokenizerInterface {
@ -383,15 +383,7 @@ public final void getText(CharTermAttribute t) {
    this.zzReader = in;
  }
-  /**
+  
   * Creates a new scanner.
   * There is also java.io.Reader version of this constructor.
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
  ClassicTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
  /** 
   * Unpacks the compressed character translation table.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
@ -14,7 +14,7 @@
 * limitations under the License.
 */
-// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC
+// Generated using ICU4J 4.8.1.1 on Sunday, July 8, 2012 2:59:49 PM UTC
 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@ -1,8 +1,8 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
 package org.apache.lucene.analysis.standard;
-/*
+/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@ -759,15 +759,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
    this.zzReader = in;
  }
-  /**
+  
   * Creates a new scanner.
   * There is also java.io.Reader version of this constructor.
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
  public StandardTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
  /** 
   * Unpacks the compressed character translation table.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/18/12 12:05 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
 package org.apache.lucene.analysis.standard;
@ -3844,15 +3844,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
    this.zzReader = in;
  }
-  /**
+  
   * Creates a new scanner.
   * There is also java.io.Reader version of this constructor.
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
  public UAX29URLEmailTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
  /** 
   * Unpacks the compressed character translation table.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
@ -1,6 +1,6 @@
 package org.apache.lucene.analysis.standard;
-/**
+/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@ -1,8 +1,8 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/22/12 10:26 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
 package org.apache.lucene.analysis.wikipedia;
-/*
+/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 1/22/12 10:26 PM from the specification file
+ * on 08.07.12 17:00 from the specification file
- * <tt>/home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
 */
 class WikipediaTokenizerImpl {
@ -519,15 +519,7 @@ final void reset() {
    this.zzReader = in;
  }
-  /**
+  
   * Creates a new scanner.
   * There is also java.io.Reader version of this constructor.
   *
   * @param   in  the java.io.Inputstream to read input from.
   */
  WikipediaTokenizerImpl(java.io.InputStream in) {
    this(new java.io.InputStreamReader(in));
  }
  /** 
   * Unpacks the compressed character translation table.
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
@ -79,7 +79,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
    public boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
        if (!keywordAttr.isKeyword()) {
-          final String term = termAtt.toString().toLowerCase(Locale.ENGLISH);
+          final String term = termAtt.toString().toLowerCase(Locale.ROOT);
          termAtt.setEmpty().append(term);
        }
        return true;
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/DateRecognizerSinkTokenizerTest.java
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer;
 public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
  public void test() throws IOException {
-    DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
+    DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT));
    String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006  The dogs finally reacted on 7/12/2006";
    TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
    TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.sinks;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.Locale;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -164,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
    TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1);
    String[] lowerCaseTokens = new String[tokens1.length];
    for (int i = 0; i < tokens1.length; i++)
-      lowerCaseTokens[i] = tokens1[i].toLowerCase();
+      lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
    assertTokenStreamContents(lowerCasing, lowerCaseTokens);
  }
@ -180,7 +181,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
      StringBuilder buffer = new StringBuilder();
      System.out.println("-----Tokens: " + tokCount[k] + "-----");
      for (int i = 0; i < tokCount[k]; i++) {
-        buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
+        buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
      }
      //make sure we produce the same tokens
      TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
@ -32,7 +32,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
  }
  public void testConsumeWordInstance() {
-    BreakIterator bi = BreakIterator.getWordInstance();
+    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newWordInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -43,7 +44,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
  /* run this to test if your JRE is buggy
  public void testWordInstanceJREBUG() {
-    BreakIterator bi = BreakIterator.getWordInstance();
+    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
    Segment ci = new Segment();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
@ -60,7 +62,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
  }
  public void testConsumeSentenceInstance() {
-    BreakIterator bi = BreakIterator.getSentenceInstance();
+    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
    CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -71,7 +74,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
  /* run this to test if your JRE is buggy
  public void testSentenceInstanceJREBUG() {
-    BreakIterator bi = BreakIterator.getSentenceInstance();
+    // we use the default locale, as its randomized by LuceneTestCase
    BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
    Segment ci = new Segment();
    for (int i = 0; i < 10000; i++) {
      char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
@ -36,7 +36,7 @@ public class TestCharArrayMap extends LuceneTestCase {
        key[j] = (char)random().nextInt(127);
      }
      String keyStr = new String(key);
-      String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ENGLISH) : keyStr; 
+      String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 
      int val = random().nextInt();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
@ -208,16 +208,16 @@ public class TestCharArraySet extends LuceneTestCase {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
    for (String upper : upperArr) {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+      assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
    }
  }
@ -235,8 +235,8 @@ public class TestCharArraySet extends LuceneTestCase {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
    }
    set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
        false);
@ -244,8 +244,8 @@ public class TestCharArraySet extends LuceneTestCase {
      set.add(upper);
    }
    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(falsePos, upperArr[i]), set
+      assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
          .contains(lowerArr[i]));
    }
  }
@ -258,7 +258,7 @@ public class TestCharArraySet extends LuceneTestCase {
    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase());
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
@ -305,7 +305,7 @@ public class TestCharArraySet extends LuceneTestCase {
    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase());
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
    setIngoreCase.add(Integer.valueOf(1));
@ -351,7 +351,7 @@ public class TestCharArraySet extends LuceneTestCase {
    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
    List<String> stopwordsUpper = new ArrayList<String>();
    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase());
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
    }
    set.addAll(Arrays.asList(TEST_STOP_WORDS));
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
 import java.util.Locale;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -53,7 +54,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
    // internal buffer size is 1024 make sure we have a surrogate pair right at the border
    builder.insert(1023, "\ud801\udc1c");
    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
-    assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
+    assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
  }
  /*
@ -70,7 +71,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
      }
      builder.append("\ud801\udc1cabc");
      Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
-      assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
+      assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
    }
  }
@ -84,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
      builder.append("A");
    }
    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
+    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
  }
  /*
@ -98,7 +99,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
    }
    builder.append("\ud801\udc1c");
    Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
+    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
  }
  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
--- a/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
+++ b/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
@ -123,11 +123,11 @@ public class GenerateJflexTLDMacros {
      while (null != (line = reader.readLine())) {
        Matcher matcher = TLD_PATTERN_1.matcher(line);
        if (matcher.matches()) {
-          TLDs.add(matcher.group(1).toLowerCase(Locale.US));
+          TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
        } else {
          matcher = TLD_PATTERN_2.matcher(line);
          if (matcher.matches()) {
-            TLDs.add(matcher.group(1).toLowerCase(Locale.US));
+            TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
          }
        }
      }
@ -146,7 +146,7 @@ public class GenerateJflexTLDMacros {
   */
  private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
    final DateFormat dateFormat = DateFormat.getDateTimeInstance
-      (DateFormat.FULL, DateFormat.FULL, Locale.US);
+      (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
    dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
    final Writer writer = new OutputStreamWriter
      (new FileOutputStream(outputFile), "UTF-8");
--- a/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/collation/TestICUCollationKeyAnalyzer.java
@ -64,7 +64,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
  //  
  public void testCollationKeySort() throws Exception {
    Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
-      (TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
+      (TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT));
    Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
      (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
    Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
@ -73,7 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
      (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
    // The ICU Collator and java.text.Collator implementations differ in their
-    // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
+    // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
    testCollationKeySort
    (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, 
     "BFJHD", "ECAGI", "BJDFH", "BJDHF");
--- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java
+++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateHTMLStripCharFilterSupplementaryMacros.java
@ -29,7 +29,7 @@ public class GenerateHTMLStripCharFilterSupplementaryMacros {
  private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
  private static final String NL = System.getProperty("line.separator");
  private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
-      (DateFormat.FULL, DateFormat.FULL, Locale.US);
+      (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
  static {
    DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
  }
--- a/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java
+++ b/lucene/analysis/icu/src/tools/java/org/apache/lucene/analysis/icu/GenerateJFlexSupplementaryMacros.java
@ -32,7 +32,7 @@ public class GenerateJFlexSupplementaryMacros {
  private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
  private static final String NL = System.getProperty("line.separator");
  private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
-    (DateFormat.FULL, DateFormat.FULL, Locale.US);
+    (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
  static {
    DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
  }
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
@ -607,7 +607,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
  private void doTestBocchan(int numIterations) throws Exception {
    LineNumberReader reader = new LineNumberReader(new InputStreamReader(
-        this.getClass().getResourceAsStream("bocchan.utf-8")));
+        this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8"));
    String line = reader.readLine();
    reader.close();
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelStemmer.java
@ -65,7 +65,7 @@ public class StempelStemmer {
    DataInputStream in = null;
    try {
      in = new DataInputStream(new BufferedInputStream(stemmerTable));
-      String method = in.readUTF().toUpperCase(Locale.ENGLISH);
+      String method = in.readUTF().toUpperCase(Locale.ROOT);
      if (method.indexOf('M') < 0) {
        return new org.egothor.stemmer.Trie(in);
      } else {
--- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java
+++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Compile.java
@ -63,6 +63,7 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.util.Locale;
 import java.util.StringTokenizer;
 /**
@ -89,7 +90,7 @@ public class Compile {
      return;
    }
-    args[0].toUpperCase();
+    args[0].toUpperCase(Locale.ROOT);
    backward = args[0].charAt(0) == '-';
    int qq = (backward) ? 1 : 0;
@ -127,7 +128,7 @@ public class Compile {
            new FileInputStream(args[i]), charset)));
        for (String line = in.readLine(); line != null; line = in.readLine()) {
          try {
-            line = line.toLowerCase();
+            line = line.toLowerCase(Locale.ROOT);
            StringTokenizer st = new StringTokenizer(line);
            String stem = st.nextToken();
            if (storeorig) {
--- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/DiffIt.java
+++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/DiffIt.java
@ -55,9 +55,11 @@
 package org.egothor.stemmer;
 import java.io.BufferedReader;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.util.Locale;
 import java.util.StringTokenizer;
 /**
@ -95,10 +97,11 @@ public class DiffIt {
      // System.out.println("[" + args[i] + "]");
      Diff diff = new Diff(ins, del, rep, nop);
      try {
-        in = new LineNumberReader(new BufferedReader(new FileReader(args[i])));
+        String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
        in = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[i]), charset)));
        for (String line = in.readLine(); line != null; line = in.readLine()) {
          try {
-            line = line.toLowerCase();
+            line = line.toLowerCase(Locale.ROOT);
            StringTokenizer st = new StringTokenizer(line);
            String stem = st.nextToken();
            System.out.println(stem + " -a");
--- a/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java
+++ b/lucene/analysis/stempel/src/test/org/egothor/stemmer/TestCompile.java
@ -60,12 +60,14 @@ import java.io.BufferedReader;
 import java.io.DataInputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
 import java.net.URI;
 import java.util.Locale;
 import java.util.StringTokenizer;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 public class TestCompile extends LuceneTestCase {
@ -107,7 +109,7 @@ public class TestCompile extends LuceneTestCase {
    Trie trie;
    DataInputStream is = new DataInputStream(new BufferedInputStream(
        new FileInputStream(path)));
-    String method = is.readUTF().toUpperCase();
+    String method = is.readUTF().toUpperCase(Locale.ROOT);
    if (method.indexOf('M') < 0) {
      trie = new Trie(is);
    } else {
@ -120,11 +122,11 @@ public class TestCompile extends LuceneTestCase {
  private static void assertTrie(Trie trie, String file, boolean usefull,
      boolean storeorig) throws Exception {
    LineNumberReader in = new LineNumberReader(new BufferedReader(
-        new FileReader(file)));
+        new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8)));
    for (String line = in.readLine(); line != null; line = in.readLine()) {
      try {
-        line = line.toLowerCase();
+        line = line.toLowerCase(Locale.ROOT);
        StringTokenizer st = new StringTokenizer(line);
        String stem = st.nextToken();
        if (storeorig) {
@ -132,7 +134,7 @@ public class TestCompile extends LuceneTestCase {
              .getLastOnPath(stem);
          StringBuilder stm = new StringBuilder(stem);
          Diff.apply(stm, cmd);
-          assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
+          assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
        }
        while (st.hasMoreTokens()) {
          String token = st.nextToken();
@ -143,7 +145,7 @@ public class TestCompile extends LuceneTestCase {
              .getLastOnPath(token);
          StringBuilder stm = new StringBuilder(token);
          Diff.apply(stm, cmd);
-          assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
+          assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
        }
      } catch (java.util.NoSuchElementException x) {
        // no base token (stem) on a line
--- a/lucene/benchmark/build.xml
+++ b/lucene/benchmark/build.xml
@ -262,9 +262,11 @@
    <target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
    <target name="clean-javacc">
-      <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
+      <delete>
-	<containsregexp expression="Generated.*By.*JavaCC"/>
+        <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
-      </fileset>
+    <containsregexp expression="Generated.*By.*JavaCC"/>
        </fileset>
      </delete>
    </target>
    <target name="javacc" depends="init,javacc-check" if="javacc.present">
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/Benchmark.java
@ -23,6 +23,7 @@ import java.io.Reader;
 import org.apache.lucene.benchmark.byTask.utils.Algorithm;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.util.IOUtils;
 /**
@ -106,7 +107,7 @@ public class Benchmark {
    Benchmark benchmark = null;
    try {
-      benchmark = new Benchmark(new FileReader(algFile));
+      benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8));
    } catch (Exception e) {
      e.printStackTrace();
      System.exit(1);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java
@ -18,12 +18,14 @@ package org.apache.lucene.benchmark.byTask.feeds;
 */
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.util.IOUtils;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileFilter;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.text.DateFormat;
 import java.text.ParsePosition;
 import java.text.SimpleDateFormat;
@ -161,7 +163,7 @@ public class DirContentSource extends ContentSource {
      dfi = new DateFormatInfo();
      dfi.pos = new ParsePosition(0);
      // date format: 30-MAR-1987 14:22:36.87
-      dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.US);
+      dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.ROOT);
      dfi.df.setLenient(true);
      dateFormat.set(dfi);
    }
@ -198,7 +200,7 @@ public class DirContentSource extends ContentSource {
      name = f.getCanonicalPath()+"_"+iteration;
    }
-    BufferedReader reader = new BufferedReader(new FileReader(f));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
    String line = null;
    //First line is the date, 3rd is the title, rest is body
    String dateStr = reader.readLine();
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
@ -29,6 +29,7 @@ import java.util.Locale;
 import java.util.Map;
 import java.util.Properties;
 import java.util.Random;
 import java.util.TimeZone;
 import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.lucene.benchmark.byTask.utils.Config;
@ -182,8 +183,8 @@ public class DocMaker implements Closeable {
  private boolean storeBytes = false;
  private static class DateUtil {
-    public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
+    public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ROOT);
-    public Calendar cal = Calendar.getInstance();
+    public Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
    public ParsePosition pos = new ParsePosition(0);
    public DateUtil() {
      parser.setLenient(true);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiContentSource.java
@ -25,6 +25,7 @@ import java.io.InputStreamReader;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
 import org.apache.lucene.benchmark.byTask.utils.Config;
@ -146,7 +147,7 @@ public class EnwikiContentSource extends ContentSource {
        case BODY:
          body = contents.toString();
          //workaround that startswith doesn't have an ignore case option, get at least 20 chars.
-          String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase();
+          String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(Locale.ROOT);
          if (startsWith.startsWith("#redirect")) {
            body = null;
          }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/FileBasedQueryMaker.java
@ -5,6 +5,7 @@ import org.apache.lucene.queryparser.classic.ParseException;
 import org.apache.lucene.queryparser.classic.QueryParser;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 import java.io.*;
@ -59,13 +60,14 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
    {
      File file = new File(fileName);
      Reader reader = null;
      // note: we use a decoding reader, so if your queries are screwed up you know
      if (file.exists()) {
-        reader = new FileReader(file);
+        reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8);
      } else {
        //see if we can find it as a resource
        InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
        if (asStream != null) {
-          reader = new InputStreamReader(asStream);
+          reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8);
        }
      }
      if (reader != null) {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java
@ -35,7 +35,7 @@ public class LongToEnglishContentSource extends ContentSource{
  }
  // TODO: we could take param to specify locale...
-  private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
+  private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
                                                                       RuleBasedNumberFormat.SPELLOUT);
  @Override
  public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java
@ -37,7 +37,7 @@ public class LongToEnglishQueryMaker implements QueryMaker {
  protected QueryParser parser;
  // TODO: we could take param to specify locale...
-  private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
+  private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
                                                                       RuleBasedNumberFormat.SPELLOUT);
  public Query makeQuery(int size) throws Exception {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ReutersContentSource.java
@ -19,8 +19,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.text.DateFormat;
 import java.text.ParsePosition;
 import java.text.SimpleDateFormat;
@ -29,6 +30,7 @@ import java.util.Date;
 import java.util.Locale;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.util.IOUtils;
 /**
 * A {@link ContentSource} reading from the Reuters collection.
@ -74,7 +76,7 @@ public class ReutersContentSource extends ContentSource {
    if (dfi == null) {
      dfi = new DateFormatInfo();
      // date format: 30-MAR-1987 14:22:36.87
-      dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
+      dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.ROOT);
      dfi.df.setLenient(true);
      dfi.pos = new ParsePosition(0);
      dateFormat.set(dfi);
@ -112,7 +114,7 @@ public class ReutersContentSource extends ContentSource {
      name = f.getCanonicalPath() + "_" + iteration;
    }
-    BufferedReader reader = new BufferedReader(new FileReader(f));
+    BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
    try {
      // First line is the date, 3rd is the title, rest is body
      String dateStr = reader.readLine();
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java
@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
      dfi = new DateFormatInfo();
      dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
      for (int i = 0; i < dfi.dfs.length; i++) {
-        dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US);
+        dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.ROOT);
        dfi.dfs[i].setLenient(true);
      }
      dfi.pos = new ParsePosition(0);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecDocParser.java
@ -47,7 +47,7 @@ public abstract class TrecDocParser {
  static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
  static {
    for (ParsePathType ppt : ParsePathType.values()) {
-      pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
+      pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT),ppt);
    }
  }
@ -60,7 +60,7 @@ public abstract class TrecDocParser {
  public static ParsePathType pathType(File f) {
    int pathLength = 0;
    while (f != null && ++pathLength < MAX_PATH_LENGTH) {
-      ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
+      ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ROOT));
      if (ppt!=null) {
        return ppt;
      }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java
@ -0,0 +1,112 @@
 /* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
 /* JavaCCOptions:STATIC=false */
 package org.apache.lucene.benchmark.byTask.feeds.demohtml;
 /**
 * This interface describes a character stream that maintains line and
 * column number positions of the characters.  It also has the capability
 * to backup the stream to some extent.  An implementation of this
 * interface is used in the TokenManager implementation generated by
 * JavaCCParser.
 *
 * All the methods except backup can be implemented in any fashion. backup
 * needs to be implemented correctly for the correct operation of the lexer.
 * Rest of the methods are all used to get information like line number,
 * column number and the String that constitutes a token and are not used
 * by the lexer. Hence their implementation won't affect the generated lexer's
 * operation.
 */
 public interface CharStream {
  /**
   * Returns the next character from the selected input.  The method
   * of selecting the input is the responsibility of the class
   * implementing this interface.  Can throw any java.io.IOException.
   */
  char readChar() throws java.io.IOException;
  /**
   * Returns the column position of the character last read.
   * @deprecated
   * @see #getEndColumn
   */
  int getColumn();
  /**
   * Returns the line number of the character last read.
   * @deprecated
   * @see #getEndLine
   */
  int getLine();
  /**
   * Returns the column number of the last character for current token (being
   * matched after the last call to BeginTOken).
   */
  int getEndColumn();
  /**
   * Returns the line number of the last character for current token (being
   * matched after the last call to BeginTOken).
   */
  int getEndLine();
  /**
   * Returns the column number of the first character for current token (being
   * matched after the last call to BeginTOken).
   */
  int getBeginColumn();
  /**
   * Returns the line number of the first character for current token (being
   * matched after the last call to BeginTOken).
   */
  int getBeginLine();
  /**
   * Backs up the input stream by amount steps. Lexer calls this method if it
   * had already read some characters, but could not use them to match a
   * (longer) token. So, they will be used again as the prefix of the next
   * token and it is the implemetation's responsibility to do this right.
   */
  void backup(int amount);
  /**
   * Returns the next character that marks the beginning of the next token.
   * All characters must remain in the buffer between two successive calls
   * to this method to implement backup correctly.
   */
  char BeginToken() throws java.io.IOException;
  /**
   * Returns a string made up of characters from the marked token beginning
   * to the current buffer position. Implementations have the choice of returning
   * anything that they want to. For example, for efficiency, one might decide
   * to just return null, which is a valid implementation.
   */
  String GetImage();
  /**
   * Returns an array of characters that make up the suffix of length 'len' for
   * the currently matched token. This is used to build up the matched string
   * for use in actions in the case of MORE. A simple and inefficient
   * implementation of this is as follows :
   *
   *   {
   *      String t = GetImage();
   *      return t.substring(t.length() - len, t.length()).toCharArray();
   *   }
   */
  char[] GetSuffix(int len);
  /**
   * The lexer calls this function to indicate that it is done with the stream
   * and hence implementations can free any resources held by this class.
   * Again, the body of this function can be just empty and it will not
   * affect the lexer's operation.
   */
  void Done();
 }
 /* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
@ -0,0 +1,123 @@
 // FastCharStream.java
 package org.apache.lucene.benchmark.byTask.feeds.demohtml;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *  
 */
 import java.io.*;
 /** An efficient implementation of JavaCC's CharStream interface.  <p>Note that
 * this does not do line-number counting, but instead keeps track of the
 * character position of the token in the input, as required by Lucene's {@link
 * org.apache.lucene.analysis.Token} API. 
 * */
 public final class FastCharStream implements CharStream {
  char[] buffer = null;
  int bufferLength = 0;				  // end of valid chars
  int bufferPosition = 0;			  // next char to read
  int tokenStart = 0;				  // offset in buffer
  int bufferStart = 0;				  // position in file of buffer
  Reader input;					  // source of chars
  /** Constructs from a Reader. */
  public FastCharStream(Reader r) {
    input = r;
  }
  public final char readChar() throws IOException {
    if (bufferPosition >= bufferLength)
      refill();
    return buffer[bufferPosition++];
  }
  private final void refill() throws IOException {
    int newPosition = bufferLength - tokenStart;
    if (tokenStart == 0) {			  // token won't fit in buffer
      if (buffer == null) {			  // first time: alloc buffer
 	buffer = new char[2048];
      } else if (bufferLength == buffer.length) { // grow buffer
 	char[] newBuffer = new char[buffer.length*2];
 	System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
 	buffer = newBuffer;
      }
    } else {					  // shift token to front
      System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
    }
    bufferLength = newPosition;			  // update state
    bufferPosition = newPosition;
    bufferStart += tokenStart;
    tokenStart = 0;
    int charsRead =				  // fill space in buffer
      input.read(buffer, newPosition, buffer.length-newPosition);
    if (charsRead == -1)
      throw new IOException("read past eof");
    else
      bufferLength += charsRead;
  }
  public final char BeginToken() throws IOException {
    tokenStart = bufferPosition;
    return readChar();
  }
  public final void backup(int amount) {
    bufferPosition -= amount;
  }
  public final String GetImage() {
    return new String(buffer, tokenStart, bufferPosition - tokenStart);
  }
  public final char[] GetSuffix(int len) {
    char[] value = new char[len];
    System.arraycopy(buffer, bufferPosition - len, value, 0, len);
    return value;
  }
  public final void Done() {
    try {
      input.close();
    } catch (IOException e) {
    }
  }
  public final int getColumn() {
    return bufferStart + bufferPosition;
  }
  public final int getLine() {
    return 1;
  }
  public final int getEndColumn() {
    return bufferStart + bufferPosition;
  }
  public final int getEndLine() {
    return 1;
  }
  public final int getBeginColumn() {
    return bufferStart + tokenStart;
  }
  public final int getBeginLine() {
    return 1;
  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
@ -29,6 +29,10 @@ public class HTMLParser implements HTMLParserConstants {
  private MyPipedInputStream pipeInStream = null;
  private PipedOutputStream pipeOutStream = null;
  public HTMLParser(Reader reader) {
    this(new FastCharStream(reader));
  }
  private class MyPipedInputStream extends PipedInputStream{
    public MyPipedInputStream(){
@ -227,7 +231,7 @@ InterruptedException {
  Token t1, t2;
  boolean inImg = false;
    t1 = jj_consume_token(TagName);
-   String tagName = t1.image.toLowerCase(Locale.ENGLISH);
+   String tagName = t1.image.toLowerCase(Locale.ROOT);
   if(Tags.WS_ELEMS.contains(tagName) ) {
      addSpace();
    }
@ -264,7 +268,7 @@ InterruptedException {
                        )
           && t2 != null)
        {
-                currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
+                currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
                if(currentMetaTag != null && currentMetaContent != null) {
                addMetaTag();
                }
@ -272,7 +276,7 @@ InterruptedException {
        if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
 null)
        {
-                currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
+                currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
                if(currentMetaTag != null && currentMetaContent != null) {
                addMetaTag();
                }
@ -464,7 +468,6 @@ null)
  /** Generated Token Manager. */
  public HTMLParserTokenManager token_source;
  SimpleCharStream jj_input_stream;
  /** Current token. */
  public Token token;
  /** Next token. */
@ -485,14 +488,9 @@ null)
  private boolean jj_rescan = false;
  private int jj_gc = 0;
-  /** Constructor with InputStream. */
+  /** Constructor with user supplied CharStream. */
-  public HTMLParser(java.io.InputStream stream) {
+  public HTMLParser(CharStream stream) {
-     this(stream, null);
+    token_source = new HTMLParserTokenManager(stream);
  }
  /** Constructor with InputStream and supplied encoding */
  public HTMLParser(java.io.InputStream stream, String encoding) {
    try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
    token_source = new HTMLParserTokenManager(jj_input_stream);
    token = new Token();
    jj_ntk = -1;
    jj_gen = 0;
@ -501,35 +499,8 @@ null)
  }
  /** Reinitialise. */
-  public void ReInit(java.io.InputStream stream) {
+  public void ReInit(CharStream stream) {
-     ReInit(stream, null);
+    token_source.ReInit(stream);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream stream, String encoding) {
    try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
    token_source.ReInit(jj_input_stream);
    token = new Token();
    jj_ntk = -1;
    jj_gen = 0;
    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
  }
  /** Constructor. */
  public HTMLParser(java.io.Reader stream) {
    jj_input_stream = new SimpleCharStream(stream, 1, 1);
    token_source = new HTMLParserTokenManager(jj_input_stream);
    token = new Token();
    jj_ntk = -1;
    jj_gen = 0;
    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
  }
  /** Reinitialise. */
  public void ReInit(java.io.Reader stream) {
    jj_input_stream.ReInit(stream, 1, 1);
    token_source.ReInit(jj_input_stream);
    token = new Token();
    jj_ntk = -1;
    jj_gen = 0;
@ -631,7 +602,7 @@ null)
      return (jj_ntk = jj_nt.kind);
  }
-  private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
+  private java.util.List jj_expentries = new java.util.ArrayList();
  private int[] jj_expentry;
  private int jj_kind = -1;
  private int[] jj_lasttokens = new int[100];
@ -691,7 +662,7 @@ null)
    jj_add_error_token(0, 0);
    int[][] exptokseq = new int[jj_expentries.size()][];
    for (int i = 0; i < jj_expentries.size(); i++) {
-      exptokseq[i] = jj_expentries.get(i);
+      exptokseq[i] = (int[])jj_expentries.get(i);
    }
    return new ParseException(token, exptokseq, tokenImage);
  }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
@ -22,6 +22,7 @@ options {
  //DEBUG_LOOKAHEAD = true;
  //DEBUG_TOKEN_MANAGER = true;
  UNICODE_INPUT = true;
  USER_CHAR_STREAM=true;
 }
 PARSER_BEGIN(HTMLParser)
@ -56,6 +57,10 @@ public class HTMLParser {
  private MyPipedInputStream pipeInStream = null;
  private PipedOutputStream pipeOutStream = null;
  public HTMLParser(Reader reader) {
    this(new FastCharStream(reader));
  }
  private class MyPipedInputStream extends PipedInputStream{
    public MyPipedInputStream(){
@ -227,7 +232,7 @@ void Tag() throws IOException :
 }
 {
  t1=<TagName> {
-   String tagName = t1.image.toLowerCase(Locale.ENGLISH);
+   String tagName = t1.image.toLowerCase(Locale.ROOT);
   if(Tags.WS_ELEMS.contains(tagName) ) {
      addSpace();
    }
@ -249,7 +254,7 @@ void Tag() throws IOException :
 			)
 	   && t2 != null)
 	{
-		currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
+		currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
 		if(currentMetaTag != null && currentMetaContent != null) {
        	addMetaTag();
 		}
@ -257,7 +262,7 @@ void Tag() throws IOException :
    	if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
 null)
 	{
-		currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
+		currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
 		if(currentMetaTag != null && currentMetaContent != null) {
        	addMetaTag();
 		}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, int curPos)
      }
      else
      {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
         int i1 = hiByte >> 6;
         long l1 = 1L << (hiByte & 077);
         int i2 = (curChar & 0xff) >> 6;
@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
 static final long[] jjtoSkip = {
   0x400000L, 
 };
-protected SimpleCharStream input_stream;
+protected CharStream input_stream;
 private final int[] jjrounds = new int[28];
 private final int[] jjstateSet = new int[56];
 protected char curChar;
 /** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream){
+public HTMLParserTokenManager(CharStream stream){
   if (SimpleCharStream.staticFlag)
      throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
   input_stream = stream;
 }
 /** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
+public HTMLParserTokenManager(CharStream stream, int lexState){
   this(stream);
   SwitchTo(lexState);
 }
 /** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream)
+public void ReInit(CharStream stream)
 {
   jjmatchedPos = jjnewStateCnt = 0;
   curLexState = defaultLexState;
@ -1475,7 +1473,7 @@ private void ReInitRounds()
 }
 /** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream, int lexState)
+public void ReInit(CharStream stream, int lexState)
 {
   ReInit(stream);
   SwitchTo(lexState);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
@ -195,4 +195,4 @@ public class ParseException extends Exception {
   }
 }
-/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
+/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
@ -1,472 +0,0 @@
 /* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
 /* JavaCCOptions:STATIC=false */
 package org.apache.lucene.benchmark.byTask.feeds.demohtml;
 /**
 * An implementation of interface CharStream, where the stream is assumed to
 * contain only ASCII characters (without unicode processing).
 */
 public class SimpleCharStream
 {
 /** Whether parser is static. */
  public static final boolean staticFlag = false;
  int bufsize;
  int available;
  int tokenBegin;
 /** Position in buffer. */
  public int bufpos = -1;
  protected int bufline[];
  protected int bufcolumn[];
  protected int column = 0;
  protected int line = 1;
  protected boolean prevCharIsCR = false;
  protected boolean prevCharIsLF = false;
  protected java.io.Reader inputStream;
  protected char[] buffer;
  protected int maxNextCharInd = 0;
  protected int inBuf = 0;
  protected int tabSize = 8;
  protected void setTabSize(int i) { tabSize = i; }
  protected int getTabSize(int i) { return tabSize; }
  protected void ExpandBuff(boolean wrapAround)
  {
     char[] newbuffer = new char[bufsize + 2048];
     int newbufline[] = new int[bufsize + 2048];
     int newbufcolumn[] = new int[bufsize + 2048];
     try
     {
        if (wrapAround)
        {
           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
           System.arraycopy(buffer, 0, newbuffer,
                                             bufsize - tokenBegin, bufpos);
           buffer = newbuffer;
           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
           System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
           bufline = newbufline;
           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
           System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
           bufcolumn = newbufcolumn;
           maxNextCharInd = (bufpos += (bufsize - tokenBegin));
        }
        else
        {
           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
           buffer = newbuffer;
           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
           bufline = newbufline;
           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
           bufcolumn = newbufcolumn;
           maxNextCharInd = (bufpos -= tokenBegin);
        }
     }
     catch (Throwable t)
     {
        throw new Error(t.getMessage());
     }
     bufsize += 2048;
     available = bufsize;
     tokenBegin = 0;
  }
  protected void FillBuff() throws java.io.IOException
  {
     if (maxNextCharInd == available)
     {
        if (available == bufsize)
        {
           if (tokenBegin > 2048)
           {
              bufpos = maxNextCharInd = 0;
              available = tokenBegin;
           }
           else if (tokenBegin < 0)
              bufpos = maxNextCharInd = 0;
           else
              ExpandBuff(false);
        }
        else if (available > tokenBegin)
           available = bufsize;
        else if ((tokenBegin - available) < 2048)
           ExpandBuff(true);
        else
           available = tokenBegin;
     }
     int i;
     try {
        if ((i = inputStream.read(buffer, maxNextCharInd,
                                    available - maxNextCharInd)) == -1)
        {
           inputStream.close();
           throw new java.io.IOException();
        }
        else
           maxNextCharInd += i;
        return;
     }
     catch(java.io.IOException e) {
        --bufpos;
        backup(0);
        if (tokenBegin == -1)
           tokenBegin = bufpos;
        throw e;
     }
  }
 /** Start. */
  public char BeginToken() throws java.io.IOException
  {
     tokenBegin = -1;
     char c = readChar();
     tokenBegin = bufpos;
     return c;
  }
  protected void UpdateLineColumn(char c)
  {
     column++;
     if (prevCharIsLF)
     {
        prevCharIsLF = false;
        line += (column = 1);
     }
     else if (prevCharIsCR)
     {
        prevCharIsCR = false;
        if (c == '\n')
        {
           prevCharIsLF = true;
        }
        else
           line += (column = 1);
     }
     switch (c)
     {
        case '\r' :
           prevCharIsCR = true;
           break;
        case '\n' :
           prevCharIsLF = true;
           break;
        case '\t' :
           column--;
           column += (tabSize - (column % tabSize));
           break;
        default :
           break;
     }
     bufline[bufpos] = line;
     bufcolumn[bufpos] = column;
  }
 /** Read a character. */
  public char readChar() throws java.io.IOException
  {
     if (inBuf > 0)
     {
        --inBuf;
        if (++bufpos == bufsize)
           bufpos = 0;
        return buffer[bufpos];
     }
     if (++bufpos >= maxNextCharInd)
        FillBuff();
     char c = buffer[bufpos];
     UpdateLineColumn(c);
     return c;
  }
  /**
   * @deprecated
   * @see #getEndColumn
   */
  public int getColumn() {
     return bufcolumn[bufpos];
  }
  /**
   * @deprecated
   * @see #getEndLine
   */
  public int getLine() {
     return bufline[bufpos];
  }
  /** Get token end column number. */
  public int getEndColumn() {
     return bufcolumn[bufpos];
  }
  /** Get token end line number. */
  public int getEndLine() {
     return bufline[bufpos];
  }
  /** Get token beginning column number. */
  public int getBeginColumn() {
     return bufcolumn[tokenBegin];
  }
  /** Get token beginning line number. */
  public int getBeginLine() {
     return bufline[tokenBegin];
  }
 /** Backup a number of characters. */
  public void backup(int amount) {
    inBuf += amount;
    if ((bufpos -= amount) < 0)
       bufpos += bufsize;
  }
  /** Constructor. */
  public SimpleCharStream(java.io.Reader dstream, int startline,
  int startcolumn, int buffersize)
  {
    inputStream = dstream;
    line = startline;
    column = startcolumn - 1;
    available = bufsize = buffersize;
    buffer = new char[buffersize];
    bufline = new int[buffersize];
    bufcolumn = new int[buffersize];
  }
  /** Constructor. */
  public SimpleCharStream(java.io.Reader dstream, int startline,
                          int startcolumn)
  {
     this(dstream, startline, startcolumn, 4096);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.Reader dstream)
  {
     this(dstream, 1, 1, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.Reader dstream, int startline,
  int startcolumn, int buffersize)
  {
    inputStream = dstream;
    line = startline;
    column = startcolumn - 1;
    if (buffer == null || buffersize != buffer.length)
    {
      available = bufsize = buffersize;
      buffer = new char[buffersize];
      bufline = new int[buffersize];
      bufcolumn = new int[buffersize];
    }
    prevCharIsLF = prevCharIsCR = false;
    tokenBegin = inBuf = maxNextCharInd = 0;
    bufpos = -1;
  }
  /** Reinitialise. */
  public void ReInit(java.io.Reader dstream, int startline,
                     int startcolumn)
  {
     ReInit(dstream, startline, startcolumn, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.Reader dstream)
  {
     ReInit(dstream, 1, 1, 4096);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
  int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
  {
     this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream, int startline,
  int startcolumn, int buffersize)
  {
     this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
                          int startcolumn) throws java.io.UnsupportedEncodingException
  {
     this(dstream, encoding, startline, startcolumn, 4096);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream, int startline,
                          int startcolumn)
  {
     this(dstream, startline, startcolumn, 4096);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
  {
     this(dstream, encoding, 1, 1, 4096);
  }
  /** Constructor. */
  public SimpleCharStream(java.io.InputStream dstream)
  {
     this(dstream, 1, 1, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
                          int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
  {
     ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream, int startline,
                          int startcolumn, int buffersize)
  {
     ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
  {
     ReInit(dstream, encoding, 1, 1, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream)
  {
     ReInit(dstream, 1, 1, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
                     int startcolumn) throws java.io.UnsupportedEncodingException
  {
     ReInit(dstream, encoding, startline, startcolumn, 4096);
  }
  /** Reinitialise. */
  public void ReInit(java.io.InputStream dstream, int startline,
                     int startcolumn)
  {
     ReInit(dstream, startline, startcolumn, 4096);
  }
  /** Get token literal value. */
  public String GetImage()
  {
     if (bufpos >= tokenBegin)
        return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
     else
        return new String(buffer, tokenBegin, bufsize - tokenBegin) +
                              new String(buffer, 0, bufpos + 1);
  }
  /** Get the suffix. */
  public char[] GetSuffix(int len)
  {
     char[] ret = new char[len];
     if ((bufpos + 1) >= len)
        System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
     else
     {
        System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
                                                          len - bufpos - 1);
        System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
     }
     return ret;
  }
  /** Reset buffer when finished. */
  public void Done()
  {
     buffer = null;
     bufline = null;
     bufcolumn = null;
  }
  /**
   * Method to adjust line and column numbers for the start of a token.
   */
  public void adjustBeginLineColumn(int newLine, int newCol)
  {
     int start = tokenBegin;
     int len;
     if (bufpos >= tokenBegin)
     {
        len = bufpos - tokenBegin + inBuf + 1;
     }
     else
     {
        len = bufsize - tokenBegin + bufpos + 1 + inBuf;
     }
     int i = 0, j = 0, k = 0;
     int nextColDiff = 0, columnDiff = 0;
     while (i < len &&
            bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
     {
        bufline[j] = newLine;
        nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
        bufcolumn[j] = newCol + columnDiff;
        columnDiff = nextColDiff;
        i++;
     }
     if (i < len)
     {
        bufline[j] = newLine++;
        bufcolumn[j] = newCol + columnDiff;
        while (i++ < len)
        {
           if (bufline[j = start % bufsize] != bufline[++start % bufsize])
              bufline[j] = newLine++;
           else
              bufline[j] = newLine;
        }
     }
     line = bufline[j];
     column = bufcolumn[j];
  }
 }
 /* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
@ -121,4 +121,4 @@ public class Token {
  }
 }
-/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
+/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java
@ -138,4 +138,4 @@ public class TokenMgrError extends Error
      this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
   }
 }
-/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
+/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AddDocTask.java
@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
 */
 import java.text.NumberFormat;
 import java.util.Locale;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@ -61,7 +62,7 @@ public class AddDocTask extends PerfTask {
  @Override
  protected String getLogMessage(int recsCount) {
-    return String.format("added %9d docs",recsCount);
+    return String.format(Locale.ROOT, "added %9d docs",recsCount);
  }
  @Override
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
@ -40,6 +40,7 @@ import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.nio.charset.Charset;
 /**
 * Create an index. <br>
@ -182,7 +183,7 @@ public class CreateIndexTask extends PerfTask {
        iwc.setInfoStream(System.err);
      } else {
        File f = new File(infoStreamVal).getAbsoluteFile();
-        iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
+        iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name()));
      }
    }
    IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
@ -17,6 +17,8 @@ package org.apache.lucene.benchmark.byTask.tasks;
 * limitations under the License.
 */
 import java.util.Locale;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.stats.Points;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@ -266,7 +268,7 @@ public abstract class PerfTask implements Cloneable {
  public void tearDown() throws Exception {
    if (++logStepCount % logStep == 0) {
      double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
-      System.out.println(String.format("%7.2f",time) + " sec --> "
+      System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> "
          + Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
    }
  }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchWithSortTask.java
@ -77,7 +77,7 @@ public class SearchWithSortTask extends ReadTask {
        } else {
          throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
        }
-        sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH)));
+        sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT)));
      }
      sortFields[upto++] = sortField0;
    }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
 import java.text.NumberFormat;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
@ -428,7 +429,7 @@ public class TaskSequence extends PerfTask {
    sb.append(padd);
    sb.append(!letChildReport ? ">" : (parallel ? "]" : "}"));
    if (fixedTime) {
-      sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s");
+      sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s");
    } else if (repetitions>1) {
      sb.append(" * " + repetitions);
    } else if (repetitions==REPEAT_EXHAUST) {
@ -487,7 +488,7 @@ public class TaskSequence extends PerfTask {
    if (rate>0) {
      seqName += "_" + rate + (perMin?"/min":"/sec"); 
    }
-    if (parallel && seqName.toLowerCase().indexOf("par")<0) {
+    if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) {
      seqName += "_Par";
    }
  }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
@ -22,6 +22,7 @@ import java.io.StringReader;
 import java.lang.reflect.Constructor;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Locale;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
@ -159,7 +160,7 @@ public class Algorithm {
                } else {
                  stok.nextToken();
                  if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
-                  String unit = stok.sval.toLowerCase();
+                  String unit = stok.sval.toLowerCase(Locale.ROOT);
                  if ("min".equals(unit)) {
                    ((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
                  } else if ("sec".equals(unit)) {
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java
@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.utils;
 */
 import java.text.NumberFormat;
 import java.util.Locale;
 /**
 * Formatting utilities (for reports).
@ -25,9 +26,9 @@ import java.text.NumberFormat;
 public class Format {
  private static NumberFormat numFormat [] = { 
-    NumberFormat.getInstance(), 
+    NumberFormat.getInstance(Locale.ROOT), 
-    NumberFormat.getInstance(),
+    NumberFormat.getInstance(Locale.ROOT),
-    NumberFormat.getInstance(),
+    NumberFormat.getInstance(Locale.ROOT),
  };
  private static final String padd = "                                                 ";
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StreamUtils.java
@ -99,7 +99,7 @@ public class StreamUtils {
    String fileName = file.getName();
    int idx = fileName.lastIndexOf('.');
    if (idx != -1) {
-      type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
+      type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT));
    }
    return type==null ? Type.PLAIN : type;
 	}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityStats.java
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality;
 import java.io.PrintWriter;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Locale;
 /**
 * Results of quality benchmark run for a single query or for a set of queries.
@ -141,7 +142,7 @@ public class QualityStats {
      logger.println(title);
    }
    prefix = prefix==null ? "" : prefix;
-    NumberFormat nf = NumberFormat.getInstance();
+    NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
    nf.setMaximumFractionDigits(3);
    nf.setMinimumFractionDigits(3);
    nf.setGroupingUsed(true);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/QueryDriver.java
@ -24,11 +24,13 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.IOUtils;
 import java.io.BufferedReader;
 import java.io.File;
-import java.io.FileReader;
+import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.nio.charset.Charset;
 import java.util.HashSet;
 import java.util.Set;
@ -51,7 +53,7 @@ public class QueryDriver {
    File topicsFile = new File(args[0]);
    File qrelsFile = new File(args[1]);
-    SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
+    SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
    FSDirectory dir = FSDirectory.open(new File(args[3]));
    String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
    IndexReader reader = DirectoryReader.open(dir);
@ -60,14 +62,14 @@ public class QueryDriver {
    int maxResults = 1000;
    String docNameField = "docname";
-    PrintWriter logger = new PrintWriter(System.out, true);
+    PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
    // use trec utilities to read trec topics into quality queries
    TrecTopicsReader qReader = new TrecTopicsReader();
-    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
+    QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
    // prepare judge, with trec utilities that read from a QRels file
-    Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
+    Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
    // validate topics & judgments match each other
    judge.validateData(qqs, logger);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/SubmissionReport.java
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality.utils;
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.text.NumberFormat;
 import java.util.Locale;
 import org.apache.lucene.benchmark.quality.QualityQuery;
 import org.apache.lucene.search.ScoreDoc;
@ -45,7 +46,7 @@ public class SubmissionReport {
  public SubmissionReport (PrintWriter logger, String name) {
    this.logger = logger;
    this.name = name;
-    nf = NumberFormat.getInstance();
+    nf = NumberFormat.getInstance(Locale.ROOT);
    nf.setMaximumFractionDigits(4);
    nf.setMinimumFractionDigits(4);
  }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
@ -19,12 +19,18 @@ package org.apache.lucene.benchmark.utils;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileFilter;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.util.IOUtils;
 /**
 * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
@ -73,7 +79,7 @@ public class ExtractReuters {
   */
  protected void extractFile(File sgmFile) {
    try {
-      BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
+      BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
      StringBuilder buffer = new StringBuilder(1024);
      StringBuilder outBuffer = new StringBuilder(1024);
@ -107,7 +113,7 @@ public class ExtractReuters {
          File outFile = new File(outputDir, sgmFile.getName() + "-"
              + (docNumber++) + ".txt");
          // System.out.println("Writing " + outFile);
-          FileWriter writer = new FileWriter(outFile);
+          OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
          writer.write(out);
          writer.close();
          outBuffer.setLength(0);
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractWikipedia.java
@ -18,8 +18,10 @@ package org.apache.lucene.benchmark.utils;
 */
 import java.io.File;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.util.Properties;
 import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@ -28,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
 import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.util.IOUtils;
 /**
 * Extract the downloaded Wikipedia dump into separate files for indexing.
@ -83,7 +86,7 @@ public class ExtractWikipedia {
    contents.append("\n");
    try {
-      FileWriter writer = new FileWriter(f);
+      Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
      writer.write(contents.toString());
      writer.close();
    } catch (IOException ioe) {
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java
@ -166,7 +166,7 @@ public class DocMakerTest extends BenchmarkTestCase {
    // DocMaker did not close its ContentSource if resetInputs was called twice,
    // leading to a file handle leak.
    File f = new File(getWorkDir(), "docMakerLeak.txt");
-    PrintStream ps = new PrintStream(f);
+    PrintStream ps = new PrintStream(f, "UTF-8");
    ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
    ps.close();
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTaskTest.java
@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.PrintStream;
 import java.nio.charset.Charset;
 import java.util.Properties;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
@ -50,7 +51,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
    PrintStream curOut = System.out;
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(baos));
+    System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name()));
    try {
      PerfRunData runData = createPerfRunData("SystemOut");
      CreateIndexTask cit = new CreateIndexTask(runData);
@ -63,7 +64,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
    PrintStream curErr = System.err;
    baos.reset();
-    System.setErr(new PrintStream(baos));
+    System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name()));
    try {
      PerfRunData runData = createPerfRunData("SystemErr");
      CreateIndexTask cit = new CreateIndexTask(runData);
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/utils/StreamUtilsTest.java
@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
 import org.apache.commons.compress.compressors.CompressorStreamFactory;
 import org.apache.lucene.benchmark.BenchmarkTestCase;
 import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util._TestUtil;
 import org.junit.After;
 import org.junit.Before;
@ -88,7 +89,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
  private File rawTextFile(String ext) throws Exception {
    File f = new File(testDir,"testfile." +	ext);
-    BufferedWriter w = new BufferedWriter(new FileWriter(f));
+    BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
    w.write(TEXT);
    w.newLine();
    w.close();
@ -117,7 +118,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
  }
 	private void writeText(OutputStream os) throws IOException {
-		BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
+		BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
  	w.write(TEXT);
  	w.newLine();
  	w.close();
@ -125,7 +126,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
  private void assertReadText(File f) throws Exception {
    InputStream ir = StreamUtils.inputStream(f);
-    InputStreamReader in = new InputStreamReader(ir);
+    InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
    BufferedReader r = new BufferedReader(in);
    String line = r.readLine();
    assertEquals("Wrong text found in "+f.getName(), TEXT, line);
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java
@ -31,7 +31,9 @@ import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.nio.charset.Charset;
 /**
 * Test that quality run does its job.
@ -55,7 +57,7 @@ public class TestQualityRun extends BenchmarkTestCase {
    int maxResults = 1000;
    String docNameField = "doctitle"; // orig docID is in the linedoc format title 
-    PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null;
+    PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null;
    // prepare topics
    InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
--- a/lucene/build.xml
+++ b/lucene/build.xml
@ -169,11 +169,19 @@
    </clover-report>
  </target>
-  <!-- Validate once from top-level. -->
+  <!-- Validation (license/notice/api checks). -->
-  <target name="validate" depends="compile-tools,resolve" description="Validate legal stuff.">
+  <target name="validate" depends="check-licenses,check-forbidden-apis" description="Validate stuff." />
  <target name="check-licenses" depends="compile-tools,resolve,load-custom-tasks" description="Validate license stuff.">
    <license-check-macro dir="${basedir}" />
  </target>
  <target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks" description="Check forbidden API calls in compiled class files.">
    <forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/jdk.txt">
      <fileset dir="${basedir}/build" includes="**/*.class" />
    </forbidden-apis>
  </target>
  <target name="resolve">
    <sequential>
      <ant dir="test-framework" target="resolve" inheritall="false">
--- a/lucene/core/build.xml
+++ b/lucene/core/build.xml
@ -68,6 +68,7 @@
            executable="${python.exe}" failonerror="true">
        <arg line="createLevAutomata.py @{n} False"/>
      </exec>
      <fixcrlf srcdir="src/java/org/apache/lucene/util/automaton" includes="*ParametricDescription.java" encoding="UTF-8"/>
    </sequential>
  </macrodef>
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@ -20,8 +20,10 @@ package org.apache.lucene.codecs;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 import java.util.Comparator;
 import java.util.Iterator;
 import java.util.Locale;
 import java.util.TreeMap;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@ -345,7 +347,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
    @Override
    public String toString() {
      final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-      final PrintStream out = new PrintStream(bos);
+      PrintStream out;
      try {
        out = new PrintStream(bos, false, "UTF-8");
      } catch (UnsupportedEncodingException bogus) {
        throw new RuntimeException(bogus);
      }
      out.println("  index FST:");
      out.println("    " + indexNodeCount + " nodes");
@ -353,7 +360,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
      out.println("    " + indexNumBytes + " bytes");
      out.println("  terms:");
      out.println("    " + totalTermCount + " terms");
-      out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format("%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
+      out.println("    " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
      out.println("  blocks:");
      out.println("    " + totalBlockCount + " blocks");
      out.println("    " + termsOnlyBlockCount + " terms-only blocks");
@ -362,9 +369,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
      out.println("    " + floorBlockCount + " floor blocks");
      out.println("    " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
      out.println("    " + floorSubBlockCount + " floor sub-blocks");
-      out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
+      out.println("    " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
-      out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
+      out.println("    " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
-      out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
+      out.println("    " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
      if (totalBlockCount != 0) {
        out.println("    by prefix length:");
        int total = 0;
@ -372,13 +379,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
          final int blockCount = blockCountByPrefixLen[prefix];
          total += blockCount;
          if (blockCount != 0) {
-            out.println("      " + String.format("%2d", prefix) + ": " + blockCount);
+            out.println("      " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
          }
        }
        assert totalBlockCount == total;
      }
-      return bos.toString();
+      try {
        return bos.toString("UTF-8");
      } catch (UnsupportedEncodingException bogus) {
        throw new RuntimeException(bogus);
      }
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/document/DateTools.java
+++ b/lucene/core/src/java/org/apache/lucene/document/DateTools.java
@ -53,7 +53,7 @@ public class DateTools {
  private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() {
    @Override
    protected Calendar initialValue() {
-      return Calendar.getInstance(GMT, Locale.US);
+      return Calendar.getInstance(GMT, Locale.ROOT);
    }
  };
@ -194,7 +194,7 @@ public class DateTools {
      this.formatLen = formatLen;
      // formatLen 10's place:                     11111111
      // formatLen  1's place:            12345678901234567
-      this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US);
+      this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.ROOT);
      this.format.setTimeZone(GMT);
    }
@ -202,7 +202,7 @@ public class DateTools {
     * in lowercase (for backwards compatibility) */
    @Override
    public String toString() {
-      return super.toString().toLowerCase(Locale.ENGLISH);
+      return super.toString().toLowerCase(Locale.ROOT);
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import org.apache.lucene.codecs.BlockTreeTermsReader;
@ -341,7 +342,7 @@ public class CheckIndex {
   *  you only call this when the index is not opened by any
   *  writer. */
  public Status checkIndex(List<String> onlySegments) throws IOException {
-    NumberFormat nf = NumberFormat.getInstance();
+    NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
    SegmentInfos sis = new SegmentInfos();
    Status result = new Status();
    result.dir = dir;
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -20,6 +20,7 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.text.NumberFormat;
 import java.util.HashSet;
 import java.util.Locale;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
@ -181,7 +182,7 @@ class DocumentsWriterPerThread {
  private int flushedDocCount;
  DocumentsWriterDeleteQueue deleteQueue;
  DeleteSlice deleteSlice;
-  private final NumberFormat nf = NumberFormat.getInstance();
+  private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
  final Allocator byteBlockAllocator;
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -27,6 +27,7 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicInteger;
@ -3610,7 +3611,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
      // lost... 
      if (infoStream.isEnabled("IW")) {
-        infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
+        infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
      }
      final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
--- a/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LogMergePolicy.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@ -535,7 +536,7 @@ public abstract class LogMergePolicy extends MergePolicy {
        if (size >= maxMergeSize) {
          extra += " [skip: too large]";
        }
-        message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
+        message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra);
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/TieredMergePolicy.java
@ -18,6 +18,7 @@ package org.apache.lucene.index;
 */
 import java.io.IOException;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Collection;
 import java.util.Collections;
@ -289,7 +290,7 @@ public class TieredMergePolicy extends MergePolicy {
        } else if (segBytes < floorSegmentBytes) {
          extra += " [floored]";
        }
-        message("  seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
+        message("  seg=" + writer.get().segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra);
      }
      minSegmentBytes = Math.min(segBytes, minSegmentBytes);
@ -388,7 +389,7 @@ public class TieredMergePolicy extends MergePolicy {
          final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
          if (verbose()) {
-            message("  maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.));
+            message("  maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.));
          }
          // If we are already running a max sized merge
@ -413,7 +414,7 @@ public class TieredMergePolicy extends MergePolicy {
          }
          if (verbose()) {
-            message("  add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
+            message("  add merge=" + writer.get().segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
          }
        } else {
          return spec;
@ -475,7 +476,7 @@ public class TieredMergePolicy extends MergePolicy {
      @Override
      public String getExplanation() {
-        return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio);
+        return "skew=" + String.format(Locale.ROOT, "%.3f", skew) + " nonDelRatio=" + String.format(Locale.ROOT, "%.3f", nonDelRatio);
      }
    };
  }
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMDirichletSimilarity.java
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
 * limitations under the License.
 */
 import java.util.Locale;
 import org.apache.lucene.search.Explanation;
 /**
@ -92,6 +94,6 @@ public class LMDirichletSimilarity extends LMSimilarity {
  @Override
  public String getName() {
-    return String.format("Dirichlet(%f)", getMu());
+    return String.format(Locale.ROOT, "Dirichlet(%f)", getMu());
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
 * limitations under the License.
 */
 import java.util.Locale;
 import org.apache.lucene.search.Explanation;
 /**
@ -72,6 +74,6 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
  @Override
  public String getName() {
-    return String.format("Jelinek-Mercer(%f)", getLambda());
+    return String.format(Locale.ROOT, "Jelinek-Mercer(%f)", getLambda());
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/LMSimilarity.java
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
 * limitations under the License.
 */
 import java.util.Locale;
 import org.apache.lucene.search.CollectionStatistics;
 import org.apache.lucene.search.Explanation;
 import org.apache.lucene.search.TermStatistics;
@ -91,9 +93,9 @@ public abstract class LMSimilarity extends SimilarityBase {
  public String toString() {
    String coll = collectionModel.getName();
    if (coll != null) {
-      return String.format("LM %s - %s", getName(), coll);
+      return String.format(Locale.ROOT, "LM %s - %s", getName(), coll);
    } else {
-      return String.format("LM %s", getName());
+      return String.format(Locale.ROOT, "LM %s", getName());
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/util/NamedThreadFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/util/NamedThreadFactory.java
@ -17,6 +17,7 @@ package org.apache.lucene.util;
 * limitations under the License.
 */
 import java.util.Locale;
 import java.util.concurrent.Executors;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.atomic.AtomicInteger;
@ -43,7 +44,7 @@ public class NamedThreadFactory implements ThreadFactory {
    final SecurityManager s = System.getSecurityManager();
    group = (s != null) ? s.getThreadGroup() : Thread.currentThread()
        .getThreadGroup();
-    this.threadNamePrefix = String.format(NAME_PATTERN,
+    this.threadNamePrefix = String.format(Locale.ROOT, NAME_PATTERN,
        checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement());
  }
@ -57,7 +58,7 @@ public class NamedThreadFactory implements ThreadFactory {
   * @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable)
   */
  public Thread newThread(Runnable r) {
-    final Thread t = new Thread(group, r, String.format("%s-%d",
+    final Thread t = new Thread(group, r, String.format(Locale.ROOT, "%s-%d",
        this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
    t.setDaemon(false);
    t.setPriority(Thread.NORM_PRIORITY);
--- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
@ -559,7 +559,7 @@ public final class RamUsageEstimator {
   */
  public static String humanReadableUnits(long bytes) {
    return humanReadableUnits(bytes, 
-        new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH)));
+        new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ROOT)));
  }
  /**
--- a/lucene/core/src/java/org/apache/lucene/util/Version.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Version.java
@ -73,7 +73,7 @@ public enum Version {
  }
  public static Version parseLeniently(String version) {
-    String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH);
+    String parsedMatchVersion = version.toUpperCase(Locale.ROOT);
    return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/createLevAutomata.py
@ -121,7 +121,7 @@ def main():
  w('package org.apache.lucene.util.automaton;')
  w('')
-  w('/**')
+  w('/*')
  w(' * Licensed to the Apache Software Foundation (ASF) under one or more')
  w(' * contributor license agreements.  See the NOTICE file distributed with')
  w(' * this work for additional information regarding copyright ownership.')
--- a/lucene/core/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/tokenattributes/TestCharTermAttributeImpl.java
@ -159,7 +159,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
  public void testAppendableInterface() {
    CharTermAttributeImpl t = new CharTermAttributeImpl();
-    Formatter formatter = new Formatter(t, Locale.US);
+    Formatter formatter = new Formatter(t, Locale.ROOT);
    formatter.format("%d", 1234);
    assertEquals("1234", t.toString());
    formatter.format("%d", 5678);
--- a/lucene/core/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/pulsing/Test10KPulsings.java
@ -71,7 +71,7 @@ public class Test10KPulsings extends LuceneTestCase {
    Field field = newField("field", "", ft);
    document.add(field);
-    NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
+    NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
    for (int i = 0; i < 10050; i++) {
      field.setStringValue(df.format(i));
@ -122,7 +122,7 @@ public class Test10KPulsings extends LuceneTestCase {
    Field field = newField("field", "", ft);
    document.add(field);
-    NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
+    NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
    final int freq = freqCutoff + 1;
--- a/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestBinaryDocument.java
@ -37,7 +37,7 @@ public class TestBinaryDocument extends LuceneTestCase {
  {
    FieldType ft = new FieldType();
    ft.setStored(true);
-    StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes());
+    StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8"));
    Field stringFldStored = new Field("stringStored", binaryValStored, ft);
    Document doc = new Document();
@ -62,7 +62,7 @@ public class TestBinaryDocument extends LuceneTestCase {
    /** fetch the binary stored field and compare it's content with the original one */
    BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
    assertNotNull(bytes);
-    String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length);
+    String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8");
    assertTrue(binaryFldStoredTest.equals(binaryValStored));
    /** fetch the string field and compare it's content with the original one */
@ -75,7 +75,7 @@ public class TestBinaryDocument extends LuceneTestCase {
  }
  public void testCompressionTools() throws Exception {
-    StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()));
+    StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8")));
    StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed));
    Document doc = new Document();
@ -94,7 +94,7 @@ public class TestBinaryDocument extends LuceneTestCase {
    assertTrue(docFromReader != null);
    /** fetch the binary compressed field and compare it's content with the original one */
-    String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
+    String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8");
    assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
    assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));
--- a/lucene/core/src/test/org/apache/lucene/document/TestDateTools.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestDateTools.java
@ -61,12 +61,12 @@ public class TestDateTools extends LuceneTestCase {
  public void testStringtoTime() throws ParseException {
    long time = DateTools.stringToTime("197001010000");
-    Calendar cal = new GregorianCalendar();
+    // we use default locale since LuceneTestCase randomizes it
    Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
    cal.clear();
    cal.set(1970, 0, 1,    // year=1970, month=january, day=1
        0, 0, 0);          // hour, minute, second
    cal.set(Calendar.MILLISECOND, 0);
    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
    assertEquals(cal.getTime().getTime(), time);
    cal.set(1980, 1, 2,    // year=1980, month=february, day=2
        11, 5, 0);          // hour, minute, second
@ -76,9 +76,9 @@ public class TestDateTools extends LuceneTestCase {
  }
  public void testDateAndTimetoString() throws ParseException {
-    Calendar cal = new GregorianCalendar();
+    // we use default locale since LuceneTestCase randomizes it
    Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
    cal.clear();
    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
    cal.set(2004, 1, 3,   // year=2004, month=february(!), day=3
        22, 8, 56);       // hour, minute, second
    cal.set(Calendar.MILLISECOND, 333);
@ -141,9 +141,9 @@ public class TestDateTools extends LuceneTestCase {
  }
  public void testRound() {
-    Calendar cal = new GregorianCalendar();
+    // we use default locale since LuceneTestCase randomizes it
    Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
    cal.clear();
    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
    cal.set(2004, 1, 3,   // year=2004, month=february(!), day=3
        22, 8, 56);       // hour, minute, second
    cal.set(Calendar.MILLISECOND, 333);
@ -180,7 +180,7 @@ public class TestDateTools extends LuceneTestCase {
  }
  private String isoFormat(Date date) {
-    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US);
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT);
    sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
    return sdf.format(date);
  }
--- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -220,10 +220,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
      ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
      CheckIndex checker = new CheckIndex(dir);
-      checker.setInfoStream(new PrintStream(bos));
+      checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
      CheckIndex.Status indexStatus = checker.checkIndex();
      assertFalse(indexStatus.clean);
-      assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName()));
+      assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName()));
      dir.close();
      _TestUtil.rmDir(oldIndxeDir);
--- a/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCheckIndex.java
@ -52,12 +52,12 @@ public class TestCheckIndex extends LuceneTestCase {
    ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
    CheckIndex checker = new CheckIndex(dir);
-    checker.setInfoStream(new PrintStream(bos));
+    checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
    if (VERBOSE) checker.setInfoStream(System.out);
    CheckIndex.Status indexStatus = checker.checkIndex();
    if (indexStatus.clean == false) {
      System.out.println("CheckIndex failed");
-      System.out.println(bos.toString());
+      System.out.println(bos.toString("UTF-8"));
      fail();
    }
--- a/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDoc.java
@ -17,11 +17,14 @@ package org.apache.lucene.index;
 * limitations under the License.
 */
 import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
-import java.io.FileWriter;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.io.Writer;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.LinkedList;
@ -78,14 +81,14 @@ public class TestDoc extends LuceneTestCase {
    }
    private File createOutput(String name, String text) throws IOException {
-        FileWriter fw = null;
+        Writer fw = null;
        PrintWriter pw = null;
        try {
            File f = new File(workDir, name);
            if (f.exists()) f.delete();
-            fw = new FileWriter(f);
+            fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
            pw = new PrintWriter(fw);
            pw.println(text);
            return f;
@ -182,9 +185,11 @@ public class TestDoc extends LuceneTestCase {
   {
      File file = new File(workDir, fileName);
      Document doc = new Document();
-      doc.add(new TextField("contents", new FileReader(file), Field.Store.NO));
+      InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
      doc.add(new TextField("contents", is, Field.Store.NO));
      writer.addDocument(doc);
      writer.commit();
      is.close();
      return writer.newestSegment();
   }
--- a/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestPayloads.java
@ -43,9 +43,8 @@ public class TestPayloads extends LuceneTestCase {
    // Simple tests to test the Payload class
    public void testPayload() throws Exception {
-        byte[] testData = "This is a test!".getBytes();
+        BytesRef payload = new BytesRef("This is a test!");
-        BytesRef payload = new BytesRef(testData);
+        assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
        assertEquals("Wrong payload length.", testData.length, payload.length);
        BytesRef clone = payload.clone();
        assertEquals(payload.length, clone.length);
@ -73,7 +72,7 @@ public class TestPayloads extends LuceneTestCase {
        // enabled in only some documents
        d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
        // only add payload data for field f2
-        analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
+        analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
        writer.addDocument(d);
        // flush
        writer.close();
@ -96,8 +95,8 @@ public class TestPayloads extends LuceneTestCase {
        d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
        d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
        // add payload data for field f2 and f3
-        analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
+        analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
-        analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3);
+        analyzer.setPayloadData("f3", "somedata".getBytes("UTF-8"), 0, 3);
        writer.addDocument(d);
        // force merge
--- a/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestBooleanMinShouldMatch.java
@ -29,6 +29,8 @@ import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
 import java.util.Locale;
 import java.util.Random;
 /** Test that BooleanQuery.setMinimumNumberShouldMatch works.
@ -378,7 +380,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
        System.err.println("------- " + test + " -------");
-        DecimalFormat f = new DecimalFormat("0.000000");
+        DecimalFormat f = new DecimalFormat("0.000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
        for (int i = 0; i < h.length; i++) {
            StoredDocument d = searcher.doc(h[i].doc);
--- a/lucene/core/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestCustomSearcherSort.java
@ -19,8 +19,10 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.Calendar;
 import java.util.GregorianCalendar;
 import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
 import java.util.TimeZone;
 import java.util.TreeMap;
 import org.apache.lucene.document.DateTools;
@ -230,10 +232,12 @@ public class TestCustomSearcherSort extends LuceneTestCase {
  private class RandomGen {
    RandomGen(Random random) {
      this.random = random;
      base.set(1980, 1, 1);
    }
    private Random random;
-    private Calendar base = new GregorianCalendar(1980, 1, 1);
+    // we use the default Locale/TZ since LuceneTestCase randomizes it
    private Calendar base = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
    // Just to generate some different Lucene Date strings
    private String getLuceneDate() {
--- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@ -36,6 +36,8 @@ import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
 import java.util.Locale;
 import java.io.IOException;
 /**
@ -486,7 +488,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
    System.err.println("------- " + test + " -------");
-    DecimalFormat f = new DecimalFormat("0.000000000");
+    DecimalFormat f = new DecimalFormat("0.000000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
    for (int i = 0; i < h.length; i++) {
      StoredDocument d = searcher.doc(h[i].doc);
--- a/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldCache.java
@ -23,6 +23,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.concurrent.CyclicBarrier;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
@ -117,10 +118,10 @@ public class TestFieldCache extends LuceneTestCase {
    try {
      FieldCache cache = FieldCache.DEFAULT;
      ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-      cache.setInfoStream(new PrintStream(bos));
+      cache.setInfoStream(new PrintStream(bos, false, "UTF-8"));
      cache.getDoubles(reader, "theDouble", false);
      cache.getFloats(reader, "theDouble", false);
-      assertTrue(bos.toString().indexOf("WARNING") != -1);
+      assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1);
    } finally {
      FieldCache.DEFAULT.purgeAllCaches();
    }
@ -261,7 +262,7 @@ public class TestFieldCache extends LuceneTestCase {
        if (chunk == 0) {
          for (int ord = 0; ord < values.size(); ord++) {
            BytesRef term = values.get(ord);
-            assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
+            assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
          }
          break;
        }
@ -275,7 +276,7 @@ public class TestFieldCache extends LuceneTestCase {
              reuse = termOrds.lookup(i, reuse);
              reuse.read(buffer);
          }
-          assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
+          assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
        }
        if (chunk <= buffer.length) {
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java
@ -44,7 +44,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
        .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));
-    DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US));
+    DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT));
    int num = atLeast(500);
    for (int l = 0; l < num; l++) {
--- a/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestRegexpRandom.java
@ -58,7 +58,7 @@ public class TestRegexpRandom extends LuceneTestCase {
    Field field = newField("field", "", customType);
    doc.add(field);
-    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
+    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
    for (int i = 0; i < 1000; i++) {
      field.setStringValue(df.format(i));
      writer.addDocument(doc);
--- a/lucene/core/src/test/org/apache/lucene/search/TestWildcardRandom.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestWildcardRandom.java
@ -54,7 +54,7 @@ public class TestWildcardRandom extends LuceneTestCase {
    Field field = newStringField("field", "", Field.Store.NO);
    doc.add(field);
-    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
+    NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
    for (int i = 0; i < 1000; i++) {
      field.setStringValue(df.format(i));
      writer.addDocument(doc);
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestBasics.java
@ -81,7 +81,7 @@ public class TestBasics extends LuceneTestCase {
    @Override
    public boolean incrementToken() throws IOException {
      if (input.incrementToken()) {
-        payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
+        payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8")));
        pos++;
        return true;
      } else {
@ -411,7 +411,7 @@ public class TestBasics extends LuceneTestCase {
  @Test
  public void testSpanPayloadCheck() throws Exception {
    SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
-    BytesRef pay = new BytesRef(("pos: " + 5).getBytes());
+    BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8"));
    SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes));
    checkHits(query, new int[]
      {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
@ -426,8 +426,8 @@ public class TestBasics extends LuceneTestCase {
    clauses[0] = term1;
    clauses[1] = term2;
    snq = new SpanNearQuery(clauses, 0, true);
-    pay = new BytesRef(("pos: " + 0).getBytes());
+    pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
-    pay2 = new BytesRef(("pos: " + 1).getBytes());
+    pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
    list = new ArrayList<byte[]>();
    list.add(pay.bytes);
    list.add(pay2.bytes);
@ -439,9 +439,9 @@ public class TestBasics extends LuceneTestCase {
    clauses[1] = term2;
    clauses[2] = new SpanTermQuery(new Term("field", "five"));
    snq = new SpanNearQuery(clauses, 0, true);
-    pay = new BytesRef(("pos: " + 0).getBytes());
+    pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
-    pay2 = new BytesRef(("pos: " + 1).getBytes());
+    pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
-    BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes());
+    BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8"));
    list = new ArrayList<byte[]>();
    list.add(pay.bytes);
    list.add(pay2.bytes);
@ -470,10 +470,10 @@ public class TestBasics extends LuceneTestCase {
    checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
    Collection<byte[]> payloads = new ArrayList<byte[]>();
-    BytesRef pay = new BytesRef(("pos: " + 0).getBytes());
+    BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
-    BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes());
+    BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
-    BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes());
+    BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8"));
-    BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes());
+    BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8"));
    payloads.add(pay.bytes);
    payloads.add(pay2.bytes);
    payloads.add(pay3.bytes);
--- a/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
+++ b/lucene/core/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
@ -276,7 +276,7 @@ public class TestPayloadSpans extends LuceneTestCase {
        Collection<byte[]> payloads = spans.getPayload();
        for (final byte [] payload : payloads) {
-          payloadSet.add(new String(payload));
+          payloadSet.add(new String(payload, "UTF-8"));
        }
      }
    }
@ -311,7 +311,7 @@ public class TestPayloadSpans extends LuceneTestCase {
      while (spans.next()) {
        Collection<byte[]> payloads = spans.getPayload();
        for (final byte[] payload : payloads) {
-          payloadSet.add(new String(payload));
+          payloadSet.add(new String(payload, "UTF-8"));
        }
      }
    }
@ -347,7 +347,7 @@ public class TestPayloadSpans extends LuceneTestCase {
        Collection<byte[]> payloads = spans.getPayload();
        for (final byte [] payload : payloads) {
-          payloadSet.add(new String(payload));
+          payloadSet.add(new String(payload, "UTF-8"));
        }
      }
    }
@ -383,7 +383,7 @@ public class TestPayloadSpans extends LuceneTestCase {
      System.out.println("Num payloads:" + payloads.size());
    for (final byte [] bytes : payloads) {
      if(VERBOSE)
-        System.out.println(new String(bytes));
+        System.out.println(new String(bytes, "UTF-8"));
    }
    reader.close();
    directory.close();
@ -456,7 +456,7 @@ public class TestPayloadSpans extends LuceneTestCase {
        for (final byte [] bytes : payload) {
          if(VERBOSE)
            System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
-              + new String(bytes));
+              + new String(bytes, "UTF-8"));
        }
        assertEquals(numPayloads[cnt],payload.size());
@ -505,9 +505,9 @@ public class TestPayloadSpans extends LuceneTestCase {
        if (!nopayload.contains(token)) {
          if (entities.contains(token)) {
-            payloadAtt.setPayload(new BytesRef((token + ":Entity:"+ pos ).getBytes()));
+            payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
          } else {
-            payloadAtt.setPayload(new BytesRef((token + ":Noise:" + pos ).getBytes()));
+            payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
          }
        }
        pos += posIncrAtt.getPositionIncrement();
--- a/Show More
+++ b/Show More