LUCENE-3312: Merge up to trunk HEAD. There was a really huge change (LUCENE-4199).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1359283 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-07-09 17:04:57 +00:00
commit 27aa2f6a28
261 changed files with 1795 additions and 1774 deletions

View File

@ -62,6 +62,12 @@ Build
* LUCENE-4115: JAR resolution/ cleanup should be done automatically for ant * LUCENE-4115: JAR resolution/ cleanup should be done automatically for ant
clean/ eclipse/ resolve (Dawid Weiss) clean/ eclipse/ resolve (Dawid Weiss)
* LUCENE-4199: Add a new target "check-forbidden-apis", that parses all
generated .class files for use of APIs that use default charset, default
locale, or default timezone and fail build if violations found. This
ensures, that Lucene / Solr is independent on local configuration options.
(Uwe Schindler, Robert Muir, Dawid Weiss)
Documentation Documentation
* LUCENE-4195: Added package documentation and examples for * LUCENE-4195: Added package documentation and examples for

View File

@ -61,50 +61,50 @@
executable="${python.exe}" failonerror="true" logerror="true"> executable="${python.exe}" failonerror="true" logerror="true">
<arg value="htmlentity.py"/> <arg value="htmlentity.py"/>
</exec> </exec>
<fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
</target> </target>
<target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present"> <target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex"> <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/> <classpath refid="jflex.classpath"/>
</taskdef> </taskdef>
<jflex file="src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex" <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
outdir="src/java/org/apache/lucene/analysis/wikipedia"
nobak="on"/>
</target> </target>
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present"> <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex"> <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/> <classpath refid="jflex.classpath"/>
</taskdef> </taskdef>
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex" <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std31"
nobak="on" />
</target> </target>
<target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present"> <target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex"> <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/> <classpath refid="jflex.classpath"/>
</taskdef> </taskdef>
<jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex" <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std31"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std34"
nobak="on" />
</target> </target>
<!-- Remove the inappropriate JFlex-generated constructor -->
<macrodef name="run-jflex">
<attribute name="dir"/>
<attribute name="name"/>
<sequential>
<jflex file="@{dir}/@{name}.jflex"
outdir="@{dir}"
nobak="on" />
<replaceregexp file="@{dir}/@{name}.java"
match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
replace="" flags="sg"/>
</sequential>
</macrodef>
<target name="clean-jflex"> <target name="clean-jflex">
<delete> <delete>
<fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java"> <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/> <containsregexp expression="generated.*by.*JFlex"/>
</fileset> </fileset>

View File

@ -1,5 +1,7 @@
package org.apache.lucene.analysis.br; package org.apache.lucene.analysis.br;
import java.util.Locale;
/* /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
@ -21,6 +23,7 @@ package org.apache.lucene.analysis.br;
* A stemmer for Brazilian Portuguese words. * A stemmer for Brazilian Portuguese words.
*/ */
public class BrazilianStemmer { public class BrazilianStemmer {
private static final Locale locale = new Locale("pt", "BR");
/** /**
* Changed term * Changed term
@ -243,7 +246,7 @@ public class BrazilianStemmer {
return null ; return null ;
} }
value = value.toLowerCase() ; value = value.toLowerCase(locale) ;
for (j=0 ; j < value.length() ; j++) { for (j=0 ; j < value.length() ; j++) {
if ((value.charAt(j) == 'á') || if ((value.charAt(j) == 'á') ||
(value.charAt(j) == 'â') || (value.charAt(j) == 'â') ||

View File

@ -1,6 +1,6 @@
package org.apache.lucene.analysis.charfilter; package org.apache.lucene.analysis.charfilter;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.

View File

@ -1,4 +1,7 @@
package org.apache.lucene.analysis.de; package org.apache.lucene.analysis.de;
import java.util.Locale;
// This file is encoded in UTF-8 // This file is encoded in UTF-8
/* /*
@ -38,6 +41,8 @@ public class GermanStemmer
*/ */
private int substCount = 0; private int substCount = 0;
private static final Locale locale = new Locale("de", "DE");
/** /**
* Stemms the given term to an unique <tt>discriminator</tt>. * Stemms the given term to an unique <tt>discriminator</tt>.
* *
@ -47,7 +52,7 @@ public class GermanStemmer
protected String stem( String term ) protected String stem( String term )
{ {
// Use lowercase for medium stemming. // Use lowercase for medium stemming.
term = term.toLowerCase(); term = term.toLowerCase(locale);
if ( !isStemmable( term ) ) if ( !isStemmable( term ) )
return term; return term;
// Reset the StringBuilder. // Reset the StringBuilder.

View File

@ -252,7 +252,7 @@ public class HunspellDictionary {
} }
String condition = ruleArgs[4]; String condition = ruleArgs[4];
affix.setCondition(condition, String.format(conditionPattern, condition)); affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition));
affix.setCrossProduct(crossProduct); affix.setCrossProduct(crossProduct);
List<HunspellAffix> list = affixes.get(affix.getAppend()); List<HunspellAffix> list = affixes.get(affix.getAppend());
@ -376,7 +376,7 @@ public class HunspellDictionary {
Arrays.sort(wordForm.getFlags()); Arrays.sort(wordForm.getFlags());
entry = line.substring(0, flagSep); entry = line.substring(0, flagSep);
if(ignoreCase) { if(ignoreCase) {
entry = entry.toLowerCase(Locale.ENGLISH); entry = entry.toLowerCase(Locale.ROOT);
} }
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.ParseException; import java.text.ParseException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -330,7 +331,7 @@ public class HunspellStemmer {
HunspellStemmer stemmer = new HunspellStemmer(dictionary); HunspellStemmer stemmer = new HunspellStemmer(dictionary);
Scanner scanner = new Scanner(System.in); Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name());
System.out.print("> "); System.out.print("> ");
while (scanner.hasNextLine()) { while (scanner.hasNextLine()) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.sinks;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParseException; import java.text.ParseException;
import java.util.Date; import java.util.Date;
import java.util.Locale;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.AttributeSource;
@ -37,10 +38,12 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
protected CharTermAttribute termAtt; protected CharTermAttribute termAtt;
/** /**
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. * Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
* DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as
* the {@link java.text.DateFormat} object.
*/ */
public DateRecognizerSinkFilter() { public DateRecognizerSinkFilter() {
this(DateFormat.getDateInstance()); this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT));
} }
public DateRecognizerSinkFilter(DateFormat dateFormat) { public DateRecognizerSinkFilter(DateFormat dateFormat) {

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
/* /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** /**
* This class is a scanner generated by * This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 9/30/11 12:10 PM from the specification file * on 08.07.12 16:59 from the specification file
* <tt>/lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt> * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/ */
class ClassicTokenizerImpl implements StandardTokenizerInterface { class ClassicTokenizerImpl implements StandardTokenizerInterface {
@ -383,15 +383,7 @@ public final void getText(CharTermAttribute t) {
this.zzReader = in; this.zzReader = in;
} }
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
ClassicTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC // Generated using ICU4J 4.8.1.1 on Sunday, July 8, 2012 2:59:49 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
/* /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -759,15 +759,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
this.zzReader = in; this.zzReader = in;
} }
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
public StandardTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/18/12 12:05 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
@ -3844,15 +3844,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
this.zzReader = in; this.zzReader = in;
} }
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
public UAX29URLEmailTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.

View File

@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard; package org.apache.lucene.analysis.standard;
/** /*
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/22/12 10:26 PM */ /* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
package org.apache.lucene.analysis.wikipedia; package org.apache.lucene.analysis.wikipedia;
/* /**
* Licensed to the Apache Software Foundation (ASF) under one or more * Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with * contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. * this work for additional information regarding copyright ownership.
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/** /**
* This class is a scanner generated by * This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 1/22/12 10:26 PM from the specification file * on 08.07.12 17:00 from the specification file
* <tt>/home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt> * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/ */
class WikipediaTokenizerImpl { class WikipediaTokenizerImpl {
@ -519,15 +519,7 @@ final void reset() {
this.zzReader = in; this.zzReader = in;
} }
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
WikipediaTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/** /**
* Unpacks the compressed character translation table. * Unpacks the compressed character translation table.

View File

@ -79,7 +79,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) { if (!keywordAttr.isKeyword()) {
final String term = termAtt.toString().toLowerCase(Locale.ENGLISH); final String term = termAtt.toString().toLowerCase(Locale.ROOT);
termAtt.setEmpty().append(term); termAtt.setEmpty().append(term);
} }
return true; return true;

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase { public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException { public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US)); DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006"; String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false)); TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter); TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.sinks;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import java.io.StringReader;
import java.util.Locale;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -164,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1); TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1);
String[] lowerCaseTokens = new String[tokens1.length]; String[] lowerCaseTokens = new String[tokens1.length];
for (int i = 0; i < tokens1.length; i++) for (int i = 0; i < tokens1.length; i++)
lowerCaseTokens[i] = tokens1[i].toLowerCase(); lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
assertTokenStreamContents(lowerCasing, lowerCaseTokens); assertTokenStreamContents(lowerCasing, lowerCaseTokens);
} }
@ -180,7 +181,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
StringBuilder buffer = new StringBuilder(); StringBuilder buffer = new StringBuilder();
System.out.println("-----Tokens: " + tokCount[k] + "-----"); System.out.println("-----Tokens: " + tokCount[k] + "-----");
for (int i = 0; i < tokCount[k]; i++) { for (int i = 0; i < tokCount[k]; i++) {
buffer.append(English.intToEnglish(i).toUpperCase()).append(' '); buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
} }
//make sure we produce the same tokens //make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString())))); TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));

View File

@ -32,7 +32,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
} }
public void testConsumeWordInstance() { public void testConsumeWordInstance() {
BreakIterator bi = BreakIterator.getWordInstance(); // we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newWordInstance(); CharArrayIterator ci = CharArrayIterator.newWordInstance();
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray(); char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -43,7 +44,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
/* run this to test if your JRE is buggy /* run this to test if your JRE is buggy
public void testWordInstanceJREBUG() { public void testWordInstanceJREBUG() {
BreakIterator bi = BreakIterator.getWordInstance(); // we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
Segment ci = new Segment(); Segment ci = new Segment();
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random).toCharArray(); char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
@ -60,7 +62,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
} }
public void testConsumeSentenceInstance() { public void testConsumeSentenceInstance() {
BreakIterator bi = BreakIterator.getSentenceInstance(); // we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newSentenceInstance(); CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray(); char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -71,7 +74,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
/* run this to test if your JRE is buggy /* run this to test if your JRE is buggy
public void testSentenceInstanceJREBUG() { public void testSentenceInstanceJREBUG() {
BreakIterator bi = BreakIterator.getSentenceInstance(); // we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
Segment ci = new Segment(); Segment ci = new Segment();
for (int i = 0; i < 10000; i++) { for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random).toCharArray(); char text[] = _TestUtil.randomUnicodeString(random).toCharArray();

View File

@ -36,7 +36,7 @@ public class TestCharArrayMap extends LuceneTestCase {
key[j] = (char)random().nextInt(127); key[j] = (char)random().nextInt(127);
} }
String keyStr = new String(key); String keyStr = new String(key);
String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ENGLISH) : keyStr; String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr;
int val = random().nextInt(); int val = random().nextInt();

View File

@ -208,16 +208,16 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper); set.add(upper);
} }
for (int i = 0; i < upperArr.length; i++) { for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
} }
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false); set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
for (String upper : upperArr) { for (String upper : upperArr) {
set.add(upper); set.add(upper);
} }
for (int i = 0; i < upperArr.length; i++) { for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i])); assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
} }
} }
@ -235,8 +235,8 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper); set.add(upper);
} }
for (int i = 0; i < upperArr.length; i++) { for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i])); assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
} }
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
false); false);
@ -244,8 +244,8 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper); set.add(upper);
} }
for (int i = 0; i < upperArr.length; i++) { for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i])); assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(falsePos, upperArr[i]), set assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
.contains(lowerArr[i])); .contains(lowerArr[i]));
} }
} }
@ -258,7 +258,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>(); List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) { for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase()); stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
} }
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIngoreCase.add(Integer.valueOf(1)); setIngoreCase.add(Integer.valueOf(1));
@ -305,7 +305,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>(); List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) { for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase()); stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
} }
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS)); setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIngoreCase.add(Integer.valueOf(1)); setIngoreCase.add(Integer.valueOf(1));
@ -351,7 +351,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS); List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>(); List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) { for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase()); stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
} }
set.addAll(Arrays.asList(TEST_STOP_WORDS)); set.addAll(Arrays.asList(TEST_STOP_WORDS));

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import java.io.IOException; import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.io.StringReader; import java.io.StringReader;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -53,7 +54,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
// internal buffer size is 1024 make sure we have a surrogate pair right at the border // internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c"); builder.insert(1023, "\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString())); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" ")); assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
} }
/* /*
@ -70,7 +71,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
} }
builder.append("\ud801\udc1cabc"); builder.append("\ud801\udc1cabc");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString())); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()}); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
} }
} }
@ -84,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("A"); builder.append("A");
} }
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
} }
/* /*
@ -98,7 +99,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
} }
builder.append("\ud801\udc1c"); builder.append("\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString())); Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()}); assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
} }
// LUCENE-3642: normalize SMP->BMP and check that offsets are correct // LUCENE-3642: normalize SMP->BMP and check that offsets are correct

View File

@ -123,11 +123,11 @@ public class GenerateJflexTLDMacros {
while (null != (line = reader.readLine())) { while (null != (line = reader.readLine())) {
Matcher matcher = TLD_PATTERN_1.matcher(line); Matcher matcher = TLD_PATTERN_1.matcher(line);
if (matcher.matches()) { if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US)); TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
} else { } else {
matcher = TLD_PATTERN_2.matcher(line); matcher = TLD_PATTERN_2.matcher(line);
if (matcher.matches()) { if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US)); TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
} }
} }
} }
@ -146,7 +146,7 @@ public class GenerateJflexTLDMacros {
*/ */
private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException { private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
final DateFormat dateFormat = DateFormat.getDateTimeInstance final DateFormat dateFormat = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US); (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
final Writer writer = new OutputStreamWriter final Writer writer = new OutputStreamWriter
(new FileOutputStream(outputFile), "UTF-8"); (new FileOutputStream(outputFile), "UTF-8");

View File

@ -64,7 +64,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
// //
public void testCollationKeySort() throws Exception { public void testCollationKeySort() throws Exception {
Analyzer usAnalyzer = new ICUCollationKeyAnalyzer Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US)); (TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT));
Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE)); (TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
@ -73,7 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk"))); (TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and java.text.Collator implementations differ in their // The ICU Collator and java.text.Collator implementations differ in their
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US. // orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
testCollationKeySort testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer, (usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF"); "BFJHD", "ECAGI", "BJDFH", "BJDHF");

View File

@ -29,7 +29,7 @@ public class GenerateHTMLStripCharFilterSupplementaryMacros {
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
private static final String NL = System.getProperty("line.separator"); private static final String NL = System.getProperty("line.separator");
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US); (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
static { static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
} }

View File

@ -32,7 +32,7 @@ public class GenerateJFlexSupplementaryMacros {
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]"); private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
private static final String NL = System.getProperty("line.separator"); private static final String NL = System.getProperty("line.separator");
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US); (DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
static { static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC")); DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
} }

View File

@ -607,7 +607,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
private void doTestBocchan(int numIterations) throws Exception { private void doTestBocchan(int numIterations) throws Exception {
LineNumberReader reader = new LineNumberReader(new InputStreamReader( LineNumberReader reader = new LineNumberReader(new InputStreamReader(
this.getClass().getResourceAsStream("bocchan.utf-8"))); this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8"));
String line = reader.readLine(); String line = reader.readLine();
reader.close(); reader.close();

View File

@ -65,7 +65,7 @@ public class StempelStemmer {
DataInputStream in = null; DataInputStream in = null;
try { try {
in = new DataInputStream(new BufferedInputStream(stemmerTable)); in = new DataInputStream(new BufferedInputStream(stemmerTable));
String method = in.readUTF().toUpperCase(Locale.ENGLISH); String method = in.readUTF().toUpperCase(Locale.ROOT);
if (method.indexOf('M') < 0) { if (method.indexOf('M') < 0) {
return new org.egothor.stemmer.Trie(in); return new org.egothor.stemmer.Trie(in);
} else { } else {

View File

@ -63,6 +63,7 @@ import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.LineNumberReader; import java.io.LineNumberReader;
import java.util.Locale;
import java.util.StringTokenizer; import java.util.StringTokenizer;
/** /**
@ -89,7 +90,7 @@ public class Compile {
return; return;
} }
args[0].toUpperCase(); args[0].toUpperCase(Locale.ROOT);
backward = args[0].charAt(0) == '-'; backward = args[0].charAt(0) == '-';
int qq = (backward) ? 1 : 0; int qq = (backward) ? 1 : 0;
@ -127,7 +128,7 @@ public class Compile {
new FileInputStream(args[i]), charset))); new FileInputStream(args[i]), charset)));
for (String line = in.readLine(); line != null; line = in.readLine()) { for (String line = in.readLine(); line != null; line = in.readLine()) {
try { try {
line = line.toLowerCase(); line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line); StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken(); String stem = st.nextToken();
if (storeorig) { if (storeorig) {

View File

@ -55,9 +55,11 @@
package org.egothor.stemmer; package org.egothor.stemmer;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.FileReader; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader; import java.io.LineNumberReader;
import java.util.Locale;
import java.util.StringTokenizer; import java.util.StringTokenizer;
/** /**
@ -95,10 +97,11 @@ public class DiffIt {
// System.out.println("[" + args[i] + "]"); // System.out.println("[" + args[i] + "]");
Diff diff = new Diff(ins, del, rep, nop); Diff diff = new Diff(ins, del, rep, nop);
try { try {
in = new LineNumberReader(new BufferedReader(new FileReader(args[i]))); String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
in = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[i]), charset)));
for (String line = in.readLine(); line != null; line = in.readLine()) { for (String line = in.readLine(); line != null; line = in.readLine()) {
try { try {
line = line.toLowerCase(); line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line); StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken(); String stem = st.nextToken();
System.out.println(stem + " -a"); System.out.println(stem + " -a");

View File

@ -60,12 +60,14 @@ import java.io.BufferedReader;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader; import java.io.LineNumberReader;
import java.net.URI; import java.net.URI;
import java.util.Locale;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
public class TestCompile extends LuceneTestCase { public class TestCompile extends LuceneTestCase {
@ -107,7 +109,7 @@ public class TestCompile extends LuceneTestCase {
Trie trie; Trie trie;
DataInputStream is = new DataInputStream(new BufferedInputStream( DataInputStream is = new DataInputStream(new BufferedInputStream(
new FileInputStream(path))); new FileInputStream(path)));
String method = is.readUTF().toUpperCase(); String method = is.readUTF().toUpperCase(Locale.ROOT);
if (method.indexOf('M') < 0) { if (method.indexOf('M') < 0) {
trie = new Trie(is); trie = new Trie(is);
} else { } else {
@ -120,11 +122,11 @@ public class TestCompile extends LuceneTestCase {
private static void assertTrie(Trie trie, String file, boolean usefull, private static void assertTrie(Trie trie, String file, boolean usefull,
boolean storeorig) throws Exception { boolean storeorig) throws Exception {
LineNumberReader in = new LineNumberReader(new BufferedReader( LineNumberReader in = new LineNumberReader(new BufferedReader(
new FileReader(file))); new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8)));
for (String line = in.readLine(); line != null; line = in.readLine()) { for (String line = in.readLine(); line != null; line = in.readLine()) {
try { try {
line = line.toLowerCase(); line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line); StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken(); String stem = st.nextToken();
if (storeorig) { if (storeorig) {
@ -132,7 +134,7 @@ public class TestCompile extends LuceneTestCase {
.getLastOnPath(stem); .getLastOnPath(stem);
StringBuilder stm = new StringBuilder(stem); StringBuilder stm = new StringBuilder(stem);
Diff.apply(stm, cmd); Diff.apply(stm, cmd);
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase()); assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
} }
while (st.hasMoreTokens()) { while (st.hasMoreTokens()) {
String token = st.nextToken(); String token = st.nextToken();
@ -143,7 +145,7 @@ public class TestCompile extends LuceneTestCase {
.getLastOnPath(token); .getLastOnPath(token);
StringBuilder stm = new StringBuilder(token); StringBuilder stm = new StringBuilder(token);
Diff.apply(stm, cmd); Diff.apply(stm, cmd);
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase()); assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
} }
} catch (java.util.NoSuchElementException x) { } catch (java.util.NoSuchElementException x) {
// no base token (stem) on a line // no base token (stem) on a line

View File

@ -262,9 +262,11 @@
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/> <target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
<target name="clean-javacc"> <target name="clean-javacc">
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java"> <delete>
<containsregexp expression="Generated.*By.*JavaCC"/> <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
</fileset> <containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
</delete>
</target> </target>
<target name="javacc" depends="init,javacc-check" if="javacc.present"> <target name="javacc" depends="init,javacc-check" if="javacc.present">

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.benchmark.byTask.utils.Algorithm; import org.apache.lucene.benchmark.byTask.utils.Algorithm;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
/** /**
@ -106,7 +107,7 @@ public class Benchmark {
Benchmark benchmark = null; Benchmark benchmark = null;
try { try {
benchmark = new Benchmark(new FileReader(algFile)); benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8));
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
System.exit(1); System.exit(1);

View File

@ -18,12 +18,14 @@ package org.apache.lucene.benchmark.byTask.feeds;
*/ */
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
import java.io.FileReader; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParsePosition; import java.text.ParsePosition;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -161,7 +163,7 @@ public class DirContentSource extends ContentSource {
dfi = new DateFormatInfo(); dfi = new DateFormatInfo();
dfi.pos = new ParsePosition(0); dfi.pos = new ParsePosition(0);
// date format: 30-MAR-1987 14:22:36.87 // date format: 30-MAR-1987 14:22:36.87
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.US); dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.ROOT);
dfi.df.setLenient(true); dfi.df.setLenient(true);
dateFormat.set(dfi); dateFormat.set(dfi);
} }
@ -198,7 +200,7 @@ public class DirContentSource extends ContentSource {
name = f.getCanonicalPath()+"_"+iteration; name = f.getCanonicalPath()+"_"+iteration;
} }
BufferedReader reader = new BufferedReader(new FileReader(f)); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
String line = null; String line = null;
//First line is the date, 3rd is the title, rest is body //First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine(); String dateStr = reader.readLine();

View File

@ -29,6 +29,7 @@ import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Properties; import java.util.Properties;
import java.util.Random; import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
@ -182,8 +183,8 @@ public class DocMaker implements Closeable {
private boolean storeBytes = false; private boolean storeBytes = false;
private static class DateUtil { private static class DateUtil {
public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US); public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ROOT);
public Calendar cal = Calendar.getInstance(); public Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
public ParsePosition pos = new ParsePosition(0); public ParsePosition pos = new ParsePosition(0);
public DateUtil() { public DateUtil() {
parser.setLenient(true); parser.setLenient(true);

View File

@ -25,6 +25,7 @@ import java.io.InputStreamReader;
import java.nio.charset.CharsetDecoder; import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction; import java.nio.charset.CodingErrorAction;
import java.util.HashMap; import java.util.HashMap;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
@ -146,7 +147,7 @@ public class EnwikiContentSource extends ContentSource {
case BODY: case BODY:
body = contents.toString(); body = contents.toString();
//workaround that startswith doesn't have an ignore case option, get at least 20 chars. //workaround that startswith doesn't have an ignore case option, get at least 20 chars.
String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(); String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(Locale.ROOT);
if (startsWith.startsWith("#redirect")) { if (startsWith.startsWith("#redirect")) {
body = null; body = null;
} }

View File

@ -5,6 +5,7 @@ import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask; import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import java.io.*; import java.io.*;
@ -59,13 +60,14 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
{ {
File file = new File(fileName); File file = new File(fileName);
Reader reader = null; Reader reader = null;
// note: we use a decoding reader, so if your queries are screwed up you know
if (file.exists()) { if (file.exists()) {
reader = new FileReader(file); reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8);
} else { } else {
//see if we can find it as a resource //see if we can find it as a resource
InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName); InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
if (asStream != null) { if (asStream != null) {
reader = new InputStreamReader(asStream); reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8);
} }
} }
if (reader != null) { if (reader != null) {

View File

@ -35,7 +35,7 @@ public class LongToEnglishContentSource extends ContentSource{
} }
// TODO: we could take param to specify locale... // TODO: we could take param to specify locale...
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
RuleBasedNumberFormat.SPELLOUT); RuleBasedNumberFormat.SPELLOUT);
@Override @Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {

View File

@ -37,7 +37,7 @@ public class LongToEnglishQueryMaker implements QueryMaker {
protected QueryParser parser; protected QueryParser parser;
// TODO: we could take param to specify locale... // TODO: we could take param to specify locale...
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH, private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
RuleBasedNumberFormat.SPELLOUT); RuleBasedNumberFormat.SPELLOUT);
public Query makeQuery(int size) throws Exception { public Query makeQuery(int size) throws Exception {

View File

@ -19,8 +19,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.ParsePosition; import java.text.ParsePosition;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -29,6 +30,7 @@ import java.util.Date;
import java.util.Locale; import java.util.Locale;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
/** /**
* A {@link ContentSource} reading from the Reuters collection. * A {@link ContentSource} reading from the Reuters collection.
@ -74,7 +76,7 @@ public class ReutersContentSource extends ContentSource {
if (dfi == null) { if (dfi == null) {
dfi = new DateFormatInfo(); dfi = new DateFormatInfo();
// date format: 30-MAR-1987 14:22:36.87 // date format: 30-MAR-1987 14:22:36.87
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US); dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.ROOT);
dfi.df.setLenient(true); dfi.df.setLenient(true);
dfi.pos = new ParsePosition(0); dfi.pos = new ParsePosition(0);
dateFormat.set(dfi); dateFormat.set(dfi);
@ -112,7 +114,7 @@ public class ReutersContentSource extends ContentSource {
name = f.getCanonicalPath() + "_" + iteration; name = f.getCanonicalPath() + "_" + iteration;
} }
BufferedReader reader = new BufferedReader(new FileReader(f)); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
try { try {
// First line is the date, 3rd is the title, rest is body // First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine(); String dateStr = reader.readLine();

View File

@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
dfi = new DateFormatInfo(); dfi = new DateFormatInfo();
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length]; dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
for (int i = 0; i < dfi.dfs.length; i++) { for (int i = 0; i < dfi.dfs.length; i++) {
dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US); dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.ROOT);
dfi.dfs[i].setLenient(true); dfi.dfs[i].setLenient(true);
} }
dfi.pos = new ParsePosition(0); dfi.pos = new ParsePosition(0);

View File

@ -47,7 +47,7 @@ public abstract class TrecDocParser {
static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>(); static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
static { static {
for (ParsePathType ppt : ParsePathType.values()) { for (ParsePathType ppt : ParsePathType.values()) {
pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt); pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT),ppt);
} }
} }
@ -60,7 +60,7 @@ public abstract class TrecDocParser {
public static ParsePathType pathType(File f) { public static ParsePathType pathType(File f) {
int pathLength = 0; int pathLength = 0;
while (f != null && ++pathLength < MAX_PATH_LENGTH) { while (f != null && ++pathLength < MAX_PATH_LENGTH) {
ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH)); ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ROOT));
if (ppt!=null) { if (ppt!=null) {
return ppt; return ppt;
} }

View File

@ -0,0 +1,112 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implemetation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
}
/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */

View File

@ -0,0 +1,123 @@
// FastCharStream.java
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
public final void backup(int amount) {
bufferPosition -= amount;
}
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
public final void Done() {
try {
input.close();
} catch (IOException e) {
}
}
public final int getColumn() {
return bufferStart + bufferPosition;
}
public final int getLine() {
return 1;
}
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
public final int getEndLine() {
return 1;
}
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
public final int getBeginLine() {
return 1;
}
}

View File

@ -29,6 +29,10 @@ public class HTMLParser implements HTMLParserConstants {
private MyPipedInputStream pipeInStream = null; private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null; private PipedOutputStream pipeOutStream = null;
public HTMLParser(Reader reader) {
this(new FastCharStream(reader));
}
private class MyPipedInputStream extends PipedInputStream{ private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){ public MyPipedInputStream(){
@ -227,7 +231,7 @@ InterruptedException {
Token t1, t2; Token t1, t2;
boolean inImg = false; boolean inImg = false;
t1 = jj_consume_token(TagName); t1 = jj_consume_token(TagName);
String tagName = t1.image.toLowerCase(Locale.ENGLISH); String tagName = t1.image.toLowerCase(Locale.ROOT);
if(Tags.WS_ELEMS.contains(tagName) ) { if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace(); addSpace();
} }
@ -264,7 +268,7 @@ InterruptedException {
) )
&& t2 != null) && t2 != null)
{ {
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH); currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) { if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag(); addMetaTag();
} }
@ -272,7 +276,7 @@ InterruptedException {
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null) null)
{ {
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH); currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) { if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag(); addMetaTag();
} }
@ -464,7 +468,6 @@ null)
/** Generated Token Manager. */ /** Generated Token Manager. */
public HTMLParserTokenManager token_source; public HTMLParserTokenManager token_source;
SimpleCharStream jj_input_stream;
/** Current token. */ /** Current token. */
public Token token; public Token token;
/** Next token. */ /** Next token. */
@ -485,14 +488,9 @@ null)
private boolean jj_rescan = false; private boolean jj_rescan = false;
private int jj_gc = 0; private int jj_gc = 0;
/** Constructor with InputStream. */ /** Constructor with user supplied CharStream. */
public HTMLParser(java.io.InputStream stream) { public HTMLParser(CharStream stream) {
this(stream, null); token_source = new HTMLParserTokenManager(stream);
}
/** Constructor with InputStream and supplied encoding */
public HTMLParser(java.io.InputStream stream, String encoding) {
try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source = new HTMLParserTokenManager(jj_input_stream);
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
@ -501,35 +499,8 @@ null)
} }
/** Reinitialise. */ /** Reinitialise. */
public void ReInit(java.io.InputStream stream) { public void ReInit(CharStream stream) {
ReInit(stream, null); token_source.ReInit(stream);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream stream, String encoding) {
try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source.ReInit(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Constructor. */
public HTMLParser(java.io.Reader stream) {
jj_input_stream = new SimpleCharStream(stream, 1, 1);
token_source = new HTMLParserTokenManager(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Reinitialise. */
public void ReInit(java.io.Reader stream) {
jj_input_stream.ReInit(stream, 1, 1);
token_source.ReInit(jj_input_stream);
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
@ -631,7 +602,7 @@ null)
return (jj_ntk = jj_nt.kind); return (jj_ntk = jj_nt.kind);
} }
private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>(); private java.util.List jj_expentries = new java.util.ArrayList();
private int[] jj_expentry; private int[] jj_expentry;
private int jj_kind = -1; private int jj_kind = -1;
private int[] jj_lasttokens = new int[100]; private int[] jj_lasttokens = new int[100];
@ -691,7 +662,7 @@ null)
jj_add_error_token(0, 0); jj_add_error_token(0, 0);
int[][] exptokseq = new int[jj_expentries.size()][]; int[][] exptokseq = new int[jj_expentries.size()][];
for (int i = 0; i < jj_expentries.size(); i++) { for (int i = 0; i < jj_expentries.size(); i++) {
exptokseq[i] = jj_expentries.get(i); exptokseq[i] = (int[])jj_expentries.get(i);
} }
return new ParseException(token, exptokseq, tokenImage); return new ParseException(token, exptokseq, tokenImage);
} }

View File

@ -22,6 +22,7 @@ options {
//DEBUG_LOOKAHEAD = true; //DEBUG_LOOKAHEAD = true;
//DEBUG_TOKEN_MANAGER = true; //DEBUG_TOKEN_MANAGER = true;
UNICODE_INPUT = true; UNICODE_INPUT = true;
USER_CHAR_STREAM=true;
} }
PARSER_BEGIN(HTMLParser) PARSER_BEGIN(HTMLParser)
@ -56,6 +57,10 @@ public class HTMLParser {
private MyPipedInputStream pipeInStream = null; private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null; private PipedOutputStream pipeOutStream = null;
public HTMLParser(Reader reader) {
this(new FastCharStream(reader));
}
private class MyPipedInputStream extends PipedInputStream{ private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){ public MyPipedInputStream(){
@ -227,7 +232,7 @@ void Tag() throws IOException :
} }
{ {
t1=<TagName> { t1=<TagName> {
String tagName = t1.image.toLowerCase(Locale.ENGLISH); String tagName = t1.image.toLowerCase(Locale.ROOT);
if(Tags.WS_ELEMS.contains(tagName) ) { if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace(); addSpace();
} }
@ -249,7 +254,7 @@ void Tag() throws IOException :
) )
&& t2 != null) && t2 != null)
{ {
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH); currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) { if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag(); addMetaTag();
} }
@ -257,7 +262,7 @@ void Tag() throws IOException :
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null) null)
{ {
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH); currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) { if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag(); addMetaTag();
} }

View File

@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, int curPos)
} }
else else
{ {
int hiByte = (curChar >> 8); int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6; int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077); long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6; int i2 = (curChar & 0xff) >> 6;
@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
static final long[] jjtoSkip = { static final long[] jjtoSkip = {
0x400000L, 0x400000L,
}; };
protected SimpleCharStream input_stream; protected CharStream input_stream;
private final int[] jjrounds = new int[28]; private final int[] jjrounds = new int[28];
private final int[] jjstateSet = new int[56]; private final int[] jjstateSet = new int[56];
protected char curChar; protected char curChar;
/** Constructor. */ /** Constructor. */
public HTMLParserTokenManager(SimpleCharStream stream){ public HTMLParserTokenManager(CharStream stream){
if (SimpleCharStream.staticFlag)
throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
input_stream = stream; input_stream = stream;
} }
/** Constructor. */ /** Constructor. */
public HTMLParserTokenManager(SimpleCharStream stream, int lexState){ public HTMLParserTokenManager(CharStream stream, int lexState){
this(stream); this(stream);
SwitchTo(lexState); SwitchTo(lexState);
} }
/** Reinitialise parser. */ /** Reinitialise parser. */
public void ReInit(SimpleCharStream stream) public void ReInit(CharStream stream)
{ {
jjmatchedPos = jjnewStateCnt = 0; jjmatchedPos = jjnewStateCnt = 0;
curLexState = defaultLexState; curLexState = defaultLexState;
@ -1475,7 +1473,7 @@ private void ReInitRounds()
} }
/** Reinitialise parser. */ /** Reinitialise parser. */
public void ReInit(SimpleCharStream stream, int lexState) public void ReInit(CharStream stream, int lexState)
{ {
ReInit(stream); ReInit(stream);
SwitchTo(lexState); SwitchTo(lexState);

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
} }
} }
/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */ /* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */

View File

@ -1,472 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/**
* An implementation of interface CharStream, where the stream is assumed to
* contain only ASCII characters (without unicode processing).
*/
public class SimpleCharStream
{
/** Whether parser is static. */
public static final boolean staticFlag = false;
int bufsize;
int available;
int tokenBegin;
/** Position in buffer. */
public int bufpos = -1;
protected int bufline[];
protected int bufcolumn[];
protected int column = 0;
protected int line = 1;
protected boolean prevCharIsCR = false;
protected boolean prevCharIsLF = false;
protected java.io.Reader inputStream;
protected char[] buffer;
protected int maxNextCharInd = 0;
protected int inBuf = 0;
protected int tabSize = 8;
protected void setTabSize(int i) { tabSize = i; }
protected int getTabSize(int i) { return tabSize; }
protected void ExpandBuff(boolean wrapAround)
{
char[] newbuffer = new char[bufsize + 2048];
int newbufline[] = new int[bufsize + 2048];
int newbufcolumn[] = new int[bufsize + 2048];
try
{
if (wrapAround)
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
System.arraycopy(buffer, 0, newbuffer,
bufsize - tokenBegin, bufpos);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos += (bufsize - tokenBegin));
}
else
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos -= tokenBegin);
}
}
catch (Throwable t)
{
throw new Error(t.getMessage());
}
bufsize += 2048;
available = bufsize;
tokenBegin = 0;
}
protected void FillBuff() throws java.io.IOException
{
if (maxNextCharInd == available)
{
if (available == bufsize)
{
if (tokenBegin > 2048)
{
bufpos = maxNextCharInd = 0;
available = tokenBegin;
}
else if (tokenBegin < 0)
bufpos = maxNextCharInd = 0;
else
ExpandBuff(false);
}
else if (available > tokenBegin)
available = bufsize;
else if ((tokenBegin - available) < 2048)
ExpandBuff(true);
else
available = tokenBegin;
}
int i;
try {
if ((i = inputStream.read(buffer, maxNextCharInd,
available - maxNextCharInd)) == -1)
{
inputStream.close();
throw new java.io.IOException();
}
else
maxNextCharInd += i;
return;
}
catch(java.io.IOException e) {
--bufpos;
backup(0);
if (tokenBegin == -1)
tokenBegin = bufpos;
throw e;
}
}
/** Start. */
public char BeginToken() throws java.io.IOException
{
tokenBegin = -1;
char c = readChar();
tokenBegin = bufpos;
return c;
}
protected void UpdateLineColumn(char c)
{
column++;
if (prevCharIsLF)
{
prevCharIsLF = false;
line += (column = 1);
}
else if (prevCharIsCR)
{
prevCharIsCR = false;
if (c == '\n')
{
prevCharIsLF = true;
}
else
line += (column = 1);
}
switch (c)
{
case '\r' :
prevCharIsCR = true;
break;
case '\n' :
prevCharIsLF = true;
break;
case '\t' :
column--;
column += (tabSize - (column % tabSize));
break;
default :
break;
}
bufline[bufpos] = line;
bufcolumn[bufpos] = column;
}
/** Read a character. */
public char readChar() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
return buffer[bufpos];
}
if (++bufpos >= maxNextCharInd)
FillBuff();
char c = buffer[bufpos];
UpdateLineColumn(c);
return c;
}
/**
* @deprecated
* @see #getEndColumn
*/
public int getColumn() {
return bufcolumn[bufpos];
}
/**
* @deprecated
* @see #getEndLine
*/
public int getLine() {
return bufline[bufpos];
}
/** Get token end column number. */
public int getEndColumn() {
return bufcolumn[bufpos];
}
/** Get token end line number. */
public int getEndLine() {
return bufline[bufpos];
}
/** Get token beginning column number. */
public int getBeginColumn() {
return bufcolumn[tokenBegin];
}
/** Get token beginning line number. */
public int getBeginLine() {
return bufline[tokenBegin];
}
/** Backup a number of characters. */
public void backup(int amount) {
inBuf += amount;
if ((bufpos -= amount) < 0)
bufpos += bufsize;
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
if (buffer == null || buffersize != buffer.length)
{
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
prevCharIsLF = prevCharIsCR = false;
tokenBegin = inBuf = maxNextCharInd = 0;
bufpos = -1;
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Get token literal value. */
public String GetImage()
{
if (bufpos >= tokenBegin)
return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
else
return new String(buffer, tokenBegin, bufsize - tokenBegin) +
new String(buffer, 0, bufpos + 1);
}
/** Get the suffix. */
public char[] GetSuffix(int len)
{
char[] ret = new char[len];
if ((bufpos + 1) >= len)
System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
else
{
System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
len - bufpos - 1);
System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
}
return ret;
}
/** Reset buffer when finished. */
public void Done()
{
buffer = null;
bufline = null;
bufcolumn = null;
}
/**
* Method to adjust line and column numbers for the start of a token.
*/
public void adjustBeginLineColumn(int newLine, int newCol)
{
int start = tokenBegin;
int len;
if (bufpos >= tokenBegin)
{
len = bufpos - tokenBegin + inBuf + 1;
}
else
{
len = bufsize - tokenBegin + bufpos + 1 + inBuf;
}
int i = 0, j = 0, k = 0;
int nextColDiff = 0, columnDiff = 0;
while (i < len &&
bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
{
bufline[j] = newLine;
nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
bufcolumn[j] = newCol + columnDiff;
columnDiff = nextColDiff;
i++;
}
if (i < len)
{
bufline[j] = newLine++;
bufcolumn[j] = newCol + columnDiff;
while (i++ < len)
{
if (bufline[j = start % bufsize] != bufline[++start % bufsize])
bufline[j] = newLine++;
else
bufline[j] = newLine;
}
}
line = bufline[j];
column = bufcolumn[j];
}
}
/* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */

View File

@ -121,4 +121,4 @@ public class Token {
} }
} }
/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */ /* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */

View File

@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
} }
} }
/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */ /* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */

View File

@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
*/ */
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@ -61,7 +62,7 @@ public class AddDocTask extends PerfTask {
@Override @Override
protected String getLogMessage(int recsCount) { protected String getLogMessage(int recsCount) {
return String.format("added %9d docs",recsCount); return String.format(Locale.ROOT, "added %9d docs",recsCount);
} }
@Override @Override

View File

@ -40,6 +40,7 @@ import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.nio.charset.Charset;
/** /**
* Create an index. <br> * Create an index. <br>
@ -182,7 +183,7 @@ public class CreateIndexTask extends PerfTask {
iwc.setInfoStream(System.err); iwc.setInfoStream(System.err);
} else { } else {
File f = new File(infoStreamVal).getAbsoluteFile(); File f = new File(infoStreamVal).getAbsoluteFile();
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)))); iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name()));
} }
} }
IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc); IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats; import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@ -266,7 +268,7 @@ public abstract class PerfTask implements Cloneable {
public void tearDown() throws Exception { public void tearDown() throws Exception {
if (++logStepCount % logStep == 0) { if (++logStepCount % logStep == 0) {
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0; double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
System.out.println(String.format("%7.2f",time) + " sec --> " System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> "
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount)); + Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
} }
} }

View File

@ -77,7 +77,7 @@ public class SearchWithSortTask extends ReadTask {
} else { } else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string"); throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
} }
sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH))); sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT)));
} }
sortFields[upto++] = sortField0; sortFields[upto++] = sortField0;
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.text.NumberFormat; import java.text.NumberFormat;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
@ -428,7 +429,7 @@ public class TaskSequence extends PerfTask {
sb.append(padd); sb.append(padd);
sb.append(!letChildReport ? ">" : (parallel ? "]" : "}")); sb.append(!letChildReport ? ">" : (parallel ? "]" : "}"));
if (fixedTime) { if (fixedTime) {
sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s"); sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s");
} else if (repetitions>1) { } else if (repetitions>1) {
sb.append(" * " + repetitions); sb.append(" * " + repetitions);
} else if (repetitions==REPEAT_EXHAUST) { } else if (repetitions==REPEAT_EXHAUST) {
@ -487,7 +488,7 @@ public class TaskSequence extends PerfTask {
if (rate>0) { if (rate>0) {
seqName += "_" + rate + (perMin?"/min":"/sec"); seqName += "_" + rate + (perMin?"/min":"/sec");
} }
if (parallel && seqName.toLowerCase().indexOf("par")<0) { if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) {
seqName += "_Par"; seqName += "_Par";
} }
} }

View File

@ -22,6 +22,7 @@ import java.io.StringReader;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask; import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
@ -159,7 +160,7 @@ public class Algorithm {
} else { } else {
stok.nextToken(); stok.nextToken();
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString()); if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
String unit = stok.sval.toLowerCase(); String unit = stok.sval.toLowerCase(Locale.ROOT);
if ("min".equals(unit)) { if ("min".equals(unit)) {
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min ((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
} else if ("sec".equals(unit)) { } else if ("sec".equals(unit)) {

View File

@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.utils;
*/ */
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.Locale;
/** /**
* Formatting utilities (for reports). * Formatting utilities (for reports).
@ -25,9 +26,9 @@ import java.text.NumberFormat;
public class Format { public class Format {
private static NumberFormat numFormat [] = { private static NumberFormat numFormat [] = {
NumberFormat.getInstance(), NumberFormat.getInstance(Locale.ROOT),
NumberFormat.getInstance(), NumberFormat.getInstance(Locale.ROOT),
NumberFormat.getInstance(), NumberFormat.getInstance(Locale.ROOT),
}; };
private static final String padd = " "; private static final String padd = " ";

View File

@ -99,7 +99,7 @@ public class StreamUtils {
String fileName = file.getName(); String fileName = file.getName();
int idx = fileName.lastIndexOf('.'); int idx = fileName.lastIndexOf('.');
if (idx != -1) { if (idx != -1) {
type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH)); type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT));
} }
return type==null ? Type.PLAIN : type; return type==null ? Type.PLAIN : type;
} }

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Locale;
/** /**
* Results of quality benchmark run for a single query or for a set of queries. * Results of quality benchmark run for a single query or for a set of queries.
@ -141,7 +142,7 @@ public class QualityStats {
logger.println(title); logger.println(title);
} }
prefix = prefix==null ? "" : prefix; prefix = prefix==null ? "" : prefix;
NumberFormat nf = NumberFormat.getInstance(); NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(3); nf.setMaximumFractionDigits(3);
nf.setMinimumFractionDigits(3); nf.setMinimumFractionDigits(3);
nf.setGroupingUsed(true); nf.setGroupingUsed(true);

View File

@ -24,11 +24,13 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.OutputStreamWriter;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
@ -51,7 +53,7 @@ public class QueryDriver {
File topicsFile = new File(args[0]); File topicsFile = new File(args[0]);
File qrelsFile = new File(args[1]); File qrelsFile = new File(args[1]);
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene"); SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
FSDirectory dir = FSDirectory.open(new File(args[3])); FSDirectory dir = FSDirectory.open(new File(args[3]));
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified. String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
IndexReader reader = DirectoryReader.open(dir); IndexReader reader = DirectoryReader.open(dir);
@ -60,14 +62,14 @@ public class QueryDriver {
int maxResults = 1000; int maxResults = 1000;
String docNameField = "docname"; String docNameField = "docname";
PrintWriter logger = new PrintWriter(System.out, true); PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
// use trec utilities to read trec topics into quality queries // use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader(); TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile))); QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
// prepare judge, with trec utilities that read from a QRels file // prepare judge, with trec utilities that read from a QRels file
Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile))); Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
// validate topics & judgments match each other // validate topics & judgments match each other
judge.validateData(qqs, logger); judge.validateData(qqs, logger);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality.utils;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.Locale;
import org.apache.lucene.benchmark.quality.QualityQuery; import org.apache.lucene.benchmark.quality.QualityQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
@ -45,7 +46,7 @@ public class SubmissionReport {
public SubmissionReport (PrintWriter logger, String name) { public SubmissionReport (PrintWriter logger, String name) {
this.logger = logger; this.logger = logger;
this.name = name; this.name = name;
nf = NumberFormat.getInstance(); nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(4); nf.setMaximumFractionDigits(4);
nf.setMinimumFractionDigits(4); nf.setMinimumFractionDigits(4);
} }

View File

@ -19,12 +19,18 @@ package org.apache.lucene.benchmark.utils;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileFilter; import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader; import java.io.FileReader;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.util.IOUtils;
/** /**
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body * Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
@ -73,7 +79,7 @@ public class ExtractReuters {
*/ */
protected void extractFile(File sgmFile) { protected void extractFile(File sgmFile) {
try { try {
BufferedReader reader = new BufferedReader(new FileReader(sgmFile)); BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
StringBuilder buffer = new StringBuilder(1024); StringBuilder buffer = new StringBuilder(1024);
StringBuilder outBuffer = new StringBuilder(1024); StringBuilder outBuffer = new StringBuilder(1024);
@ -107,7 +113,7 @@ public class ExtractReuters {
File outFile = new File(outputDir, sgmFile.getName() + "-" File outFile = new File(outputDir, sgmFile.getName() + "-"
+ (docNumber++) + ".txt"); + (docNumber++) + ".txt");
// System.out.println("Writing " + outFile); // System.out.println("Writing " + outFile);
FileWriter writer = new FileWriter(outFile); OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
writer.write(out); writer.write(out);
writer.close(); writer.close();
outBuffer.setLength(0); outBuffer.setLength(0);

View File

@ -18,8 +18,10 @@ package org.apache.lucene.benchmark.utils;
*/ */
import java.io.File; import java.io.File;
import java.io.FileWriter; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Properties; import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource; import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@ -28,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException; import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.util.IOUtils;
/** /**
* Extract the downloaded Wikipedia dump into separate files for indexing. * Extract the downloaded Wikipedia dump into separate files for indexing.
@ -83,7 +86,7 @@ public class ExtractWikipedia {
contents.append("\n"); contents.append("\n");
try { try {
FileWriter writer = new FileWriter(f); Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
writer.write(contents.toString()); writer.write(contents.toString());
writer.close(); writer.close();
} catch (IOException ioe) { } catch (IOException ioe) {

View File

@ -166,7 +166,7 @@ public class DocMakerTest extends BenchmarkTestCase {
// DocMaker did not close its ContentSource if resetInputs was called twice, // DocMaker did not close its ContentSource if resetInputs was called twice,
// leading to a file handle leak. // leading to a file handle leak.
File f = new File(getWorkDir(), "docMakerLeak.txt"); File f = new File(getWorkDir(), "docMakerLeak.txt");
PrintStream ps = new PrintStream(f); PrintStream ps = new PrintStream(f, "UTF-8");
ps.println("one title\t" + System.currentTimeMillis() + "\tsome content"); ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
ps.close(); ps.close();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.PrintStream; import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.Properties; import java.util.Properties;
import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.BenchmarkTestCase;
@ -50,7 +51,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
PrintStream curOut = System.out; PrintStream curOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream();
System.setOut(new PrintStream(baos)); System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name()));
try { try {
PerfRunData runData = createPerfRunData("SystemOut"); PerfRunData runData = createPerfRunData("SystemOut");
CreateIndexTask cit = new CreateIndexTask(runData); CreateIndexTask cit = new CreateIndexTask(runData);
@ -63,7 +64,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
PrintStream curErr = System.err; PrintStream curErr = System.err;
baos.reset(); baos.reset();
System.setErr(new PrintStream(baos)); System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name()));
try { try {
PerfRunData runData = createPerfRunData("SystemErr"); PerfRunData runData = createPerfRunData("SystemErr");
CreateIndexTask cit = new CreateIndexTask(runData); CreateIndexTask cit = new CreateIndexTask(runData);

View File

@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils; import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -88,7 +89,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
private File rawTextFile(String ext) throws Exception { private File rawTextFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext); File f = new File(testDir,"testfile." + ext);
BufferedWriter w = new BufferedWriter(new FileWriter(f)); BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
w.write(TEXT); w.write(TEXT);
w.newLine(); w.newLine();
w.close(); w.close();
@ -117,7 +118,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
} }
private void writeText(OutputStream os) throws IOException { private void writeText(OutputStream os) throws IOException {
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os)); BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
w.write(TEXT); w.write(TEXT);
w.newLine(); w.newLine();
w.close(); w.close();
@ -125,7 +126,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
private void assertReadText(File f) throws Exception { private void assertReadText(File f) throws Exception {
InputStream ir = StreamUtils.inputStream(f); InputStream ir = StreamUtils.inputStream(f);
InputStreamReader in = new InputStreamReader(ir); InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
BufferedReader r = new BufferedReader(in); BufferedReader r = new BufferedReader(in);
String line = r.readLine(); String line = r.readLine();
assertEquals("Wrong text found in "+f.getName(), TEXT, line); assertEquals("Wrong text found in "+f.getName(), TEXT, line);

View File

@ -31,7 +31,9 @@ import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.nio.charset.Charset;
/** /**
* Test that quality run does its job. * Test that quality run does its job.
@ -55,7 +57,7 @@ public class TestQualityRun extends BenchmarkTestCase {
int maxResults = 1000; int maxResults = 1000;
String docNameField = "doctitle"; // orig docID is in the linedoc format title String docNameField = "doctitle"; // orig docID is in the linedoc format title
PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null; PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null;
// prepare topics // prepare topics
InputStream topics = getClass().getResourceAsStream("trecTopics.txt"); InputStream topics = getClass().getResourceAsStream("trecTopics.txt");

View File

@ -169,11 +169,19 @@
</clover-report> </clover-report>
</target> </target>
<!-- Validate once from top-level. --> <!-- Validation (license/notice/api checks). -->
<target name="validate" depends="compile-tools,resolve" description="Validate legal stuff."> <target name="validate" depends="check-licenses,check-forbidden-apis" description="Validate stuff." />
<target name="check-licenses" depends="compile-tools,resolve,load-custom-tasks" description="Validate license stuff.">
<license-check-macro dir="${basedir}" /> <license-check-macro dir="${basedir}" />
</target> </target>
<target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks" description="Check forbidden API calls in compiled class files.">
<forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/jdk.txt">
<fileset dir="${basedir}/build" includes="**/*.class" />
</forbidden-apis>
</target>
<target name="resolve"> <target name="resolve">
<sequential> <sequential>
<ant dir="test-framework" target="resolve" inheritall="false"> <ant dir="test-framework" target="resolve" inheritall="false">

View File

@ -68,6 +68,7 @@
executable="${python.exe}" failonerror="true"> executable="${python.exe}" failonerror="true">
<arg line="createLevAutomata.py @{n} False"/> <arg line="createLevAutomata.py @{n} False"/>
</exec> </exec>
<fixcrlf srcdir="src/java/org/apache/lucene/util/automaton" includes="*ParametricDescription.java" encoding="UTF-8"/>
</sequential> </sequential>
</macrodef> </macrodef>

View File

@ -20,8 +20,10 @@ package org.apache.lucene.codecs;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Comparator; import java.util.Comparator;
import java.util.Iterator; import java.util.Iterator;
import java.util.Locale;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
@ -345,7 +347,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override @Override
public String toString() { public String toString() {
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
final PrintStream out = new PrintStream(bos); PrintStream out;
try {
out = new PrintStream(bos, false, "UTF-8");
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
out.println(" index FST:"); out.println(" index FST:");
out.println(" " + indexNodeCount + " nodes"); out.println(" " + indexNodeCount + " nodes");
@ -353,7 +360,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
out.println(" " + indexNumBytes + " bytes"); out.println(" " + indexNumBytes + " bytes");
out.println(" terms:"); out.println(" terms:");
out.println(" " + totalTermCount + " terms"); out.println(" " + totalTermCount + " terms");
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format("%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : "")); out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
out.println(" blocks:"); out.println(" blocks:");
out.println(" " + totalBlockCount + " blocks"); out.println(" " + totalBlockCount + " blocks");
out.println(" " + termsOnlyBlockCount + " terms-only blocks"); out.println(" " + termsOnlyBlockCount + " terms-only blocks");
@ -362,9 +369,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
out.println(" " + floorBlockCount + " floor blocks"); out.println(" " + floorBlockCount + " floor blocks");
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks"); out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
out.println(" " + floorSubBlockCount + " floor sub-blocks"); out.println(" " + floorSubBlockCount + " floor sub-blocks");
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : "")); out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : "")); out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : "")); out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
if (totalBlockCount != 0) { if (totalBlockCount != 0) {
out.println(" by prefix length:"); out.println(" by prefix length:");
int total = 0; int total = 0;
@ -372,13 +379,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
final int blockCount = blockCountByPrefixLen[prefix]; final int blockCount = blockCountByPrefixLen[prefix];
total += blockCount; total += blockCount;
if (blockCount != 0) { if (blockCount != 0) {
out.println(" " + String.format("%2d", prefix) + ": " + blockCount); out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
} }
} }
assert totalBlockCount == total; assert totalBlockCount == total;
} }
return bos.toString(); try {
return bos.toString("UTF-8");
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
} }
} }

View File

@ -53,7 +53,7 @@ public class DateTools {
private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() { private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() {
@Override @Override
protected Calendar initialValue() { protected Calendar initialValue() {
return Calendar.getInstance(GMT, Locale.US); return Calendar.getInstance(GMT, Locale.ROOT);
} }
}; };
@ -194,7 +194,7 @@ public class DateTools {
this.formatLen = formatLen; this.formatLen = formatLen;
// formatLen 10's place: 11111111 // formatLen 10's place: 11111111
// formatLen 1's place: 12345678901234567 // formatLen 1's place: 12345678901234567
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US); this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.ROOT);
this.format.setTimeZone(GMT); this.format.setTimeZone(GMT);
} }
@ -202,7 +202,7 @@ public class DateTools {
* in lowercase (for backwards compatibility) */ * in lowercase (for backwards compatibility) */
@Override @Override
public String toString() { public String toString() {
return super.toString().toLowerCase(Locale.ENGLISH); return super.toString().toLowerCase(Locale.ROOT);
} }
} }

View File

@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import org.apache.lucene.codecs.BlockTreeTermsReader; import org.apache.lucene.codecs.BlockTreeTermsReader;
@ -341,7 +342,7 @@ public class CheckIndex {
* you only call this when the index is not opened by any * you only call this when the index is not opened by any
* writer. */ * writer. */
public Status checkIndex(List<String> onlySegments) throws IOException { public Status checkIndex(List<String> onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance(); NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
SegmentInfos sis = new SegmentInfos(); SegmentInfos sis = new SegmentInfos();
Status result = new Status(); Status result = new Status();
result.dir = dir; result.dir = dir;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.text.NumberFormat; import java.text.NumberFormat;
import java.util.HashSet; import java.util.HashSet;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.Codec;
@ -181,7 +182,7 @@ class DocumentsWriterPerThread {
private int flushedDocCount; private int flushedDocCount;
DocumentsWriterDeleteQueue deleteQueue; DocumentsWriterDeleteQueue deleteQueue;
DeleteSlice deleteSlice; DeleteSlice deleteSlice;
private final NumberFormat nf = NumberFormat.getInstance(); private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
final Allocator byteBlockAllocator; final Allocator byteBlockAllocator;

View File

@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -3610,7 +3611,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// lost... // lost...
if (infoStream.isEnabled("IW")) { if (infoStream.isEnabled("IW")) {
infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
} }
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.Map; import java.util.Map;
@ -535,7 +536,7 @@ public abstract class LogMergePolicy extends MergePolicy {
if (size >= maxMergeSize) { if (size >= maxMergeSize) {
extra += " [skip: too large]"; extra += " [skip: too large]";
} }
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra); message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra);
} }
} }

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
@ -289,7 +290,7 @@ public class TieredMergePolicy extends MergePolicy {
} else if (segBytes < floorSegmentBytes) { } else if (segBytes < floorSegmentBytes) {
extra += " [floored]"; extra += " [floored]";
} }
message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra); message(" seg=" + writer.get().segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra);
} }
minSegmentBytes = Math.min(segBytes, minSegmentBytes); minSegmentBytes = Math.min(segBytes, minSegmentBytes);
@ -388,7 +389,7 @@ public class TieredMergePolicy extends MergePolicy {
final MergeScore score = score(candidate, hitTooLarge, mergingBytes); final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
if (verbose()) { if (verbose()) {
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.)); message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.));
} }
// If we are already running a max sized merge // If we are already running a max sized merge
@ -413,7 +414,7 @@ public class TieredMergePolicy extends MergePolicy {
} }
if (verbose()) { if (verbose()) {
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : "")); message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
} }
} else { } else {
return spec; return spec;
@ -475,7 +476,7 @@ public class TieredMergePolicy extends MergePolicy {
@Override @Override
public String getExplanation() { public String getExplanation() {
return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio); return "skew=" + String.format(Locale.ROOT, "%.3f", skew) + " nonDelRatio=" + String.format(Locale.ROOT, "%.3f", nonDelRatio);
} }
}; };
} }

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
/** /**
@ -92,6 +94,6 @@ public class LMDirichletSimilarity extends LMSimilarity {
@Override @Override
public String getName() { public String getName() {
return String.format("Dirichlet(%f)", getMu()); return String.format(Locale.ROOT, "Dirichlet(%f)", getMu());
} }
} }

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
/** /**
@ -72,6 +74,6 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
@Override @Override
public String getName() { public String getName() {
return String.format("Jelinek-Mercer(%f)", getLambda()); return String.format(Locale.ROOT, "Jelinek-Mercer(%f)", getLambda());
} }
} }

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale;
import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.TermStatistics;
@ -91,9 +93,9 @@ public abstract class LMSimilarity extends SimilarityBase {
public String toString() { public String toString() {
String coll = collectionModel.getName(); String coll = collectionModel.getName();
if (coll != null) { if (coll != null) {
return String.format("LM %s - %s", getName(), coll); return String.format(Locale.ROOT, "LM %s - %s", getName(), coll);
} else { } else {
return String.format("LM %s", getName()); return String.format(Locale.ROOT, "LM %s", getName());
} }
} }

View File

@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License. * limitations under the License.
*/ */
import java.util.Locale;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -43,7 +44,7 @@ public class NamedThreadFactory implements ThreadFactory {
final SecurityManager s = System.getSecurityManager(); final SecurityManager s = System.getSecurityManager();
group = (s != null) ? s.getThreadGroup() : Thread.currentThread() group = (s != null) ? s.getThreadGroup() : Thread.currentThread()
.getThreadGroup(); .getThreadGroup();
this.threadNamePrefix = String.format(NAME_PATTERN, this.threadNamePrefix = String.format(Locale.ROOT, NAME_PATTERN,
checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement()); checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement());
} }
@ -57,7 +58,7 @@ public class NamedThreadFactory implements ThreadFactory {
* @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable) * @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable)
*/ */
public Thread newThread(Runnable r) { public Thread newThread(Runnable r) {
final Thread t = new Thread(group, r, String.format("%s-%d", final Thread t = new Thread(group, r, String.format(Locale.ROOT, "%s-%d",
this.threadNamePrefix, threadNumber.getAndIncrement()), 0); this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
t.setDaemon(false); t.setDaemon(false);
t.setPriority(Thread.NORM_PRIORITY); t.setPriority(Thread.NORM_PRIORITY);

View File

@ -559,7 +559,7 @@ public final class RamUsageEstimator {
*/ */
public static String humanReadableUnits(long bytes) { public static String humanReadableUnits(long bytes) {
return humanReadableUnits(bytes, return humanReadableUnits(bytes,
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH))); new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ROOT)));
} }
/** /**

View File

@ -73,7 +73,7 @@ public enum Version {
} }
public static Version parseLeniently(String version) { public static Version parseLeniently(String version) {
String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH); String parsedMatchVersion = version.toUpperCase(Locale.ROOT);
return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2")); return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
} }
} }

View File

@ -121,7 +121,7 @@ def main():
w('package org.apache.lucene.util.automaton;') w('package org.apache.lucene.util.automaton;')
w('') w('')
w('/**') w('/*')
w(' * Licensed to the Apache Software Foundation (ASF) under one or more') w(' * Licensed to the Apache Software Foundation (ASF) under one or more')
w(' * contributor license agreements. See the NOTICE file distributed with') w(' * contributor license agreements. See the NOTICE file distributed with')
w(' * this work for additional information regarding copyright ownership.') w(' * this work for additional information regarding copyright ownership.')

View File

@ -159,7 +159,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
public void testAppendableInterface() { public void testAppendableInterface() {
CharTermAttributeImpl t = new CharTermAttributeImpl(); CharTermAttributeImpl t = new CharTermAttributeImpl();
Formatter formatter = new Formatter(t, Locale.US); Formatter formatter = new Formatter(t, Locale.ROOT);
formatter.format("%d", 1234); formatter.format("%d", 1234);
assertEquals("1234", t.toString()); assertEquals("1234", t.toString());
formatter.format("%d", 5678); formatter.format("%d", 5678);

View File

@ -71,7 +71,7 @@ public class Test10KPulsings extends LuceneTestCase {
Field field = newField("field", "", ft); Field field = newField("field", "", ft);
document.add(field); document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH)); NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 10050; i++) { for (int i = 0; i < 10050; i++) {
field.setStringValue(df.format(i)); field.setStringValue(df.format(i));
@ -122,7 +122,7 @@ public class Test10KPulsings extends LuceneTestCase {
Field field = newField("field", "", ft); Field field = newField("field", "", ft);
document.add(field); document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH)); NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
final int freq = freqCutoff + 1; final int freq = freqCutoff + 1;

View File

@ -37,7 +37,7 @@ public class TestBinaryDocument extends LuceneTestCase {
{ {
FieldType ft = new FieldType(); FieldType ft = new FieldType();
ft.setStored(true); ft.setStored(true);
StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes()); StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8"));
Field stringFldStored = new Field("stringStored", binaryValStored, ft); Field stringFldStored = new Field("stringStored", binaryValStored, ft);
Document doc = new Document(); Document doc = new Document();
@ -62,7 +62,7 @@ public class TestBinaryDocument extends LuceneTestCase {
/** fetch the binary stored field and compare it's content with the original one */ /** fetch the binary stored field and compare it's content with the original one */
BytesRef bytes = docFromReader.getBinaryValue("binaryStored"); BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
assertNotNull(bytes); assertNotNull(bytes);
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length); String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8");
assertTrue(binaryFldStoredTest.equals(binaryValStored)); assertTrue(binaryFldStoredTest.equals(binaryValStored));
/** fetch the string field and compare it's content with the original one */ /** fetch the string field and compare it's content with the original one */
@ -75,7 +75,7 @@ public class TestBinaryDocument extends LuceneTestCase {
} }
public void testCompressionTools() throws Exception { public void testCompressionTools() throws Exception {
StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8")));
StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed));
Document doc = new Document(); Document doc = new Document();
@ -94,7 +94,7 @@ public class TestBinaryDocument extends LuceneTestCase {
assertTrue(docFromReader != null); assertTrue(docFromReader != null);
/** fetch the binary compressed field and compare it's content with the original one */ /** fetch the binary compressed field and compare it's content with the original one */
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))); String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8");
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed)); assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed)); assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));

View File

@ -61,12 +61,12 @@ public class TestDateTools extends LuceneTestCase {
public void testStringtoTime() throws ParseException { public void testStringtoTime() throws ParseException {
long time = DateTools.stringToTime("197001010000"); long time = DateTools.stringToTime("197001010000");
Calendar cal = new GregorianCalendar(); // we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear(); cal.clear();
cal.set(1970, 0, 1, // year=1970, month=january, day=1 cal.set(1970, 0, 1, // year=1970, month=january, day=1
0, 0, 0); // hour, minute, second 0, 0, 0); // hour, minute, second
cal.set(Calendar.MILLISECOND, 0); cal.set(Calendar.MILLISECOND, 0);
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
assertEquals(cal.getTime().getTime(), time); assertEquals(cal.getTime().getTime(), time);
cal.set(1980, 1, 2, // year=1980, month=february, day=2 cal.set(1980, 1, 2, // year=1980, month=february, day=2
11, 5, 0); // hour, minute, second 11, 5, 0); // hour, minute, second
@ -76,9 +76,9 @@ public class TestDateTools extends LuceneTestCase {
} }
public void testDateAndTimetoString() throws ParseException { public void testDateAndTimetoString() throws ParseException {
Calendar cal = new GregorianCalendar(); // we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear(); cal.clear();
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
22, 8, 56); // hour, minute, second 22, 8, 56); // hour, minute, second
cal.set(Calendar.MILLISECOND, 333); cal.set(Calendar.MILLISECOND, 333);
@ -141,9 +141,9 @@ public class TestDateTools extends LuceneTestCase {
} }
public void testRound() { public void testRound() {
Calendar cal = new GregorianCalendar(); // we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear(); cal.clear();
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3 cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
22, 8, 56); // hour, minute, second 22, 8, 56); // hour, minute, second
cal.set(Calendar.MILLISECOND, 333); cal.set(Calendar.MILLISECOND, 333);
@ -180,7 +180,7 @@ public class TestDateTools extends LuceneTestCase {
} }
private String isoFormat(Date date) { private String isoFormat(Date date) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US); SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT);
sdf.setTimeZone(TimeZone.getTimeZone("GMT")); sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
return sdf.format(date); return sdf.format(date);
} }

View File

@ -220,10 +220,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir); CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos)); checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
CheckIndex.Status indexStatus = checker.checkIndex(); CheckIndex.Status indexStatus = checker.checkIndex();
assertFalse(indexStatus.clean); assertFalse(indexStatus.clean);
assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName())); assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName()));
dir.close(); dir.close();
_TestUtil.rmDir(oldIndxeDir); _TestUtil.rmDir(oldIndxeDir);

View File

@ -52,12 +52,12 @@ public class TestCheckIndex extends LuceneTestCase {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir); CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos)); checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
if (VERBOSE) checker.setInfoStream(System.out); if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex(); CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) { if (indexStatus.clean == false) {
System.out.println("CheckIndex failed"); System.out.println("CheckIndex failed");
System.out.println(bos.toString()); System.out.println(bos.toString("UTF-8"));
fail(); fail();
} }

View File

@ -17,11 +17,14 @@ package org.apache.lucene.index;
* limitations under the License. * limitations under the License.
*/ */
import java.io.File; import java.io.File;
import java.io.FileReader; import java.io.FileInputStream;
import java.io.FileWriter; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.StringWriter; import java.io.StringWriter;
import java.io.Writer;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.LinkedList; import java.util.LinkedList;
@ -78,14 +81,14 @@ public class TestDoc extends LuceneTestCase {
} }
private File createOutput(String name, String text) throws IOException { private File createOutput(String name, String text) throws IOException {
FileWriter fw = null; Writer fw = null;
PrintWriter pw = null; PrintWriter pw = null;
try { try {
File f = new File(workDir, name); File f = new File(workDir, name);
if (f.exists()) f.delete(); if (f.exists()) f.delete();
fw = new FileWriter(f); fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
pw = new PrintWriter(fw); pw = new PrintWriter(fw);
pw.println(text); pw.println(text);
return f; return f;
@ -182,9 +185,11 @@ public class TestDoc extends LuceneTestCase {
{ {
File file = new File(workDir, fileName); File file = new File(workDir, fileName);
Document doc = new Document(); Document doc = new Document();
doc.add(new TextField("contents", new FileReader(file), Field.Store.NO)); InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
doc.add(new TextField("contents", is, Field.Store.NO));
writer.addDocument(doc); writer.addDocument(doc);
writer.commit(); writer.commit();
is.close();
return writer.newestSegment(); return writer.newestSegment();
} }

View File

@ -43,9 +43,8 @@ public class TestPayloads extends LuceneTestCase {
// Simple tests to test the Payload class // Simple tests to test the Payload class
public void testPayload() throws Exception { public void testPayload() throws Exception {
byte[] testData = "This is a test!".getBytes(); BytesRef payload = new BytesRef("This is a test!");
BytesRef payload = new BytesRef(testData); assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
assertEquals("Wrong payload length.", testData.length, payload.length);
BytesRef clone = payload.clone(); BytesRef clone = payload.clone();
assertEquals(payload.length, clone.length); assertEquals(payload.length, clone.length);
@ -73,7 +72,7 @@ public class TestPayloads extends LuceneTestCase {
// enabled in only some documents // enabled in only some documents
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// only add payload data for field f2 // only add payload data for field f2
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
writer.addDocument(d); writer.addDocument(d);
// flush // flush
writer.close(); writer.close();
@ -96,8 +95,8 @@ public class TestPayloads extends LuceneTestCase {
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO)); d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO)); d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// add payload data for field f2 and f3 // add payload data for field f2 and f3
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3); analyzer.setPayloadData("f3", "somedata".getBytes("UTF-8"), 0, 3);
writer.addDocument(d); writer.addDocument(d);
// force merge // force merge

View File

@ -29,6 +29,8 @@ import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Locale;
import java.util.Random; import java.util.Random;
/** Test that BooleanQuery.setMinimumNumberShouldMatch works. /** Test that BooleanQuery.setMinimumNumberShouldMatch works.
@ -378,7 +380,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
System.err.println("------- " + test + " -------"); System.err.println("------- " + test + " -------");
DecimalFormat f = new DecimalFormat("0.000000"); DecimalFormat f = new DecimalFormat("0.000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
for (int i = 0; i < h.length; i++) { for (int i = 0; i < h.length; i++) {
StoredDocument d = searcher.doc(h[i].doc); StoredDocument d = searcher.doc(h[i].doc);

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search;
import java.io.IOException; import java.io.IOException;
import java.util.Calendar; import java.util.Calendar;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.TimeZone;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools;
@ -230,10 +232,12 @@ public class TestCustomSearcherSort extends LuceneTestCase {
private class RandomGen { private class RandomGen {
RandomGen(Random random) { RandomGen(Random random) {
this.random = random; this.random = random;
base.set(1980, 1, 1);
} }
private Random random; private Random random;
private Calendar base = new GregorianCalendar(1980, 1, 1); // we use the default Locale/TZ since LuceneTestCase randomizes it
private Calendar base = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
// Just to generate some different Lucene Date strings // Just to generate some different Lucene Date strings
private String getLuceneDate() { private String getLuceneDate() {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Locale;
import java.io.IOException; import java.io.IOException;
/** /**
@ -486,7 +488,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
System.err.println("------- " + test + " -------"); System.err.println("------- " + test + " -------");
DecimalFormat f = new DecimalFormat("0.000000000"); DecimalFormat f = new DecimalFormat("0.000000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
for (int i = 0; i < h.length; i++) { for (int i = 0; i < h.length; i++) {
StoredDocument d = searcher.doc(h[i].doc); StoredDocument d = searcher.doc(h[i].doc);

View File

@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.LinkedHashSet; import java.util.LinkedHashSet;
import java.util.List; import java.util.List;
import java.util.Locale;
import java.util.concurrent.CyclicBarrier; import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
@ -117,10 +118,10 @@ public class TestFieldCache extends LuceneTestCase {
try { try {
FieldCache cache = FieldCache.DEFAULT; FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos)); cache.setInfoStream(new PrintStream(bos, false, "UTF-8"));
cache.getDoubles(reader, "theDouble", false); cache.getDoubles(reader, "theDouble", false);
cache.getFloats(reader, "theDouble", false); cache.getFloats(reader, "theDouble", false);
assertTrue(bos.toString().indexOf("WARNING") != -1); assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1);
} finally { } finally {
FieldCache.DEFAULT.purgeAllCaches(); FieldCache.DEFAULT.purgeAllCaches();
} }
@ -261,7 +262,7 @@ public class TestFieldCache extends LuceneTestCase {
if (chunk == 0) { if (chunk == 0) {
for (int ord = 0; ord < values.size(); ord++) { for (int ord = 0; ord < values.size(); ord++) {
BytesRef term = values.get(ord); BytesRef term = values.get(ord);
assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term); assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
} }
break; break;
} }
@ -275,7 +276,7 @@ public class TestFieldCache extends LuceneTestCase {
reuse = termOrds.lookup(i, reuse); reuse = termOrds.lookup(i, reuse);
reuse.read(buffer); reuse.read(buffer);
} }
assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual)); assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
} }
if (chunk <= buffer.length) { if (chunk <= buffer.length) {

View File

@ -44,7 +44,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000))); .setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US)); DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT));
int num = atLeast(500); int num = atLeast(500);
for (int l = 0; l < num; l++) { for (int l = 0; l < num; l++) {

View File

@ -58,7 +58,7 @@ public class TestRegexpRandom extends LuceneTestCase {
Field field = newField("field", "", customType); Field field = newField("field", "", customType);
doc.add(field); doc.add(field);
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
field.setStringValue(df.format(i)); field.setStringValue(df.format(i));
writer.addDocument(doc); writer.addDocument(doc);

View File

@ -54,7 +54,7 @@ public class TestWildcardRandom extends LuceneTestCase {
Field field = newStringField("field", "", Field.Store.NO); Field field = newStringField("field", "", Field.Store.NO);
doc.add(field); doc.add(field);
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 1000; i++) { for (int i = 0; i < 1000; i++) {
field.setStringValue(df.format(i)); field.setStringValue(df.format(i));
writer.addDocument(doc); writer.addDocument(doc);

View File

@ -81,7 +81,7 @@ public class TestBasics extends LuceneTestCase {
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (input.incrementToken()) { if (input.incrementToken()) {
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes())); payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8")));
pos++; pos++;
return true; return true;
} else { } else {
@ -411,7 +411,7 @@ public class TestBasics extends LuceneTestCase {
@Test @Test
public void testSpanPayloadCheck() throws Exception { public void testSpanPayloadCheck() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five")); SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
BytesRef pay = new BytesRef(("pos: " + 5).getBytes()); BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8"));
SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes)); SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes));
checkHits(query, new int[] checkHits(query, new int[]
{1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995}); {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
@ -426,8 +426,8 @@ public class TestBasics extends LuceneTestCase {
clauses[0] = term1; clauses[0] = term1;
clauses[1] = term2; clauses[1] = term2;
snq = new SpanNearQuery(clauses, 0, true); snq = new SpanNearQuery(clauses, 0, true);
pay = new BytesRef(("pos: " + 0).getBytes()); pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
pay2 = new BytesRef(("pos: " + 1).getBytes()); pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
list = new ArrayList<byte[]>(); list = new ArrayList<byte[]>();
list.add(pay.bytes); list.add(pay.bytes);
list.add(pay2.bytes); list.add(pay2.bytes);
@ -439,9 +439,9 @@ public class TestBasics extends LuceneTestCase {
clauses[1] = term2; clauses[1] = term2;
clauses[2] = new SpanTermQuery(new Term("field", "five")); clauses[2] = new SpanTermQuery(new Term("field", "five"));
snq = new SpanNearQuery(clauses, 0, true); snq = new SpanNearQuery(clauses, 0, true);
pay = new BytesRef(("pos: " + 0).getBytes()); pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
pay2 = new BytesRef(("pos: " + 1).getBytes()); pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes()); BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8"));
list = new ArrayList<byte[]>(); list = new ArrayList<byte[]>();
list.add(pay.bytes); list.add(pay.bytes);
list.add(pay2.bytes); list.add(pay2.bytes);
@ -470,10 +470,10 @@ public class TestBasics extends LuceneTestCase {
checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903}); checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
Collection<byte[]> payloads = new ArrayList<byte[]>(); Collection<byte[]> payloads = new ArrayList<byte[]>();
BytesRef pay = new BytesRef(("pos: " + 0).getBytes()); BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes()); BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes()); BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8"));
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes()); BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8"));
payloads.add(pay.bytes); payloads.add(pay.bytes);
payloads.add(pay2.bytes); payloads.add(pay2.bytes);
payloads.add(pay3.bytes); payloads.add(pay3.bytes);

View File

@ -276,7 +276,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Collection<byte[]> payloads = spans.getPayload(); Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) { for (final byte [] payload : payloads) {
payloadSet.add(new String(payload)); payloadSet.add(new String(payload, "UTF-8"));
} }
} }
} }
@ -311,7 +311,7 @@ public class TestPayloadSpans extends LuceneTestCase {
while (spans.next()) { while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload(); Collection<byte[]> payloads = spans.getPayload();
for (final byte[] payload : payloads) { for (final byte[] payload : payloads) {
payloadSet.add(new String(payload)); payloadSet.add(new String(payload, "UTF-8"));
} }
} }
} }
@ -347,7 +347,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Collection<byte[]> payloads = spans.getPayload(); Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) { for (final byte [] payload : payloads) {
payloadSet.add(new String(payload)); payloadSet.add(new String(payload, "UTF-8"));
} }
} }
} }
@ -383,7 +383,7 @@ public class TestPayloadSpans extends LuceneTestCase {
System.out.println("Num payloads:" + payloads.size()); System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) { for (final byte [] bytes : payloads) {
if(VERBOSE) if(VERBOSE)
System.out.println(new String(bytes)); System.out.println(new String(bytes, "UTF-8"));
} }
reader.close(); reader.close();
directory.close(); directory.close();
@ -456,7 +456,7 @@ public class TestPayloadSpans extends LuceneTestCase {
for (final byte [] bytes : payload) { for (final byte [] bytes : payload) {
if(VERBOSE) if(VERBOSE)
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " " System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
+ new String(bytes)); + new String(bytes, "UTF-8"));
} }
assertEquals(numPayloads[cnt],payload.size()); assertEquals(numPayloads[cnt],payload.size());
@ -505,9 +505,9 @@ public class TestPayloadSpans extends LuceneTestCase {
if (!nopayload.contains(token)) { if (!nopayload.contains(token)) {
if (entities.contains(token)) { if (entities.contains(token)) {
payloadAtt.setPayload(new BytesRef((token + ":Entity:"+ pos ).getBytes())); payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
} else { } else {
payloadAtt.setPayload(new BytesRef((token + ":Noise:" + pos ).getBytes())); payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
} }
} }
pos += posIncrAtt.getPositionIncrement(); pos += posIncrAtt.getPositionIncrement();

Some files were not shown because too many files have changed in this diff Show More