LUCENE-3312: Merge up to trunk HEAD. There was a really huge change (LUCENE-4199).

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3312@1359283 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-07-09 17:04:57 +00:00
commit 27aa2f6a28
261 changed files with 1795 additions and 1774 deletions

View File

@ -62,6 +62,12 @@ Build
* LUCENE-4115: JAR resolution/ cleanup should be done automatically for ant
clean/ eclipse/ resolve (Dawid Weiss)
* LUCENE-4199: Add a new target "check-forbidden-apis", that parses all
generated .class files for use of APIs that use default charset, default
locale, or default timezone and fail build if violations found. This
ensures, that Lucene / Solr is independent on local configuration options.
(Uwe Schindler, Robert Muir, Dawid Weiss)
Documentation
* LUCENE-4195: Added package documentation and examples for

View File

@ -61,50 +61,50 @@
executable="${python.exe}" failonerror="true" logerror="true">
<arg value="htmlentity.py"/>
</exec>
<fixcrlf file="src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex" encoding="UTF-8"/>
</target>
<target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
<jflex file="src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/wikipedia"
nobak="on"/>
<run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
</target>
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std31"
nobak="on" />
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
</target>
<target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
<jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std31"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
outdir="src/java/org/apache/lucene/analysis/standard/std34"
nobak="on" />
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
</target>
<!-- Remove the inappropriate JFlex-generated constructor -->
<macrodef name="run-jflex">
<attribute name="dir"/>
<attribute name="name"/>
<sequential>
<jflex file="@{dir}/@{name}.jflex"
outdir="@{dir}"
nobak="on" />
<replaceregexp file="@{dir}/@{name}.java"
match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
replace="" flags="sg"/>
</sequential>
</macrodef>
<target name="clean-jflex">
<delete>
<fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>

View File

@ -1,5 +1,7 @@
package org.apache.lucene.analysis.br;
import java.util.Locale;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -21,6 +23,7 @@ package org.apache.lucene.analysis.br;
* A stemmer for Brazilian Portuguese words.
*/
public class BrazilianStemmer {
private static final Locale locale = new Locale("pt", "BR");
/**
* Changed term
@ -243,7 +246,7 @@ public class BrazilianStemmer {
return null ;
}
value = value.toLowerCase() ;
value = value.toLowerCase(locale) ;
for (j=0 ; j < value.length() ; j++) {
if ((value.charAt(j) == 'á') ||
(value.charAt(j) == 'â') ||

View File

@ -1,6 +1,6 @@
package org.apache.lucene.analysis.charfilter;
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.

View File

@ -1,4 +1,7 @@
package org.apache.lucene.analysis.de;
import java.util.Locale;
// This file is encoded in UTF-8
/*
@ -38,6 +41,8 @@ public class GermanStemmer
*/
private int substCount = 0;
private static final Locale locale = new Locale("de", "DE");
/**
* Stemms the given term to an unique <tt>discriminator</tt>.
*
@ -47,7 +52,7 @@ public class GermanStemmer
protected String stem( String term )
{
// Use lowercase for medium stemming.
term = term.toLowerCase();
term = term.toLowerCase(locale);
if ( !isStemmable( term ) )
return term;
// Reset the StringBuilder.

View File

@ -252,7 +252,7 @@ public class HunspellDictionary {
}
String condition = ruleArgs[4];
affix.setCondition(condition, String.format(conditionPattern, condition));
affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition));
affix.setCrossProduct(crossProduct);
List<HunspellAffix> list = affixes.get(affix.getAppend());
@ -376,7 +376,7 @@ public class HunspellDictionary {
Arrays.sort(wordForm.getFlags());
entry = line.substring(0, flagSep);
if(ignoreCase) {
entry = entry.toLowerCase(Locale.ENGLISH);
entry = entry.toLowerCase(Locale.ROOT);
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
@ -330,7 +331,7 @@ public class HunspellStemmer {
HunspellStemmer stemmer = new HunspellStemmer(dictionary);
Scanner scanner = new Scanner(System.in);
Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name());
System.out.print("> ");
while (scanner.hasNextLine()) {

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.sinks;
import java.text.DateFormat;
import java.text.ParseException;
import java.util.Date;
import java.util.Locale;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
@ -37,10 +38,12 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
protected CharTermAttribute termAtt;
/**
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
* Uses {@link java.text.DateFormat#getDateInstance(int, Locale)
* DateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as
* the {@link java.text.DateFormat} object.
*/
public DateRecognizerSinkFilter() {
this(DateFormat.getDateInstance());
this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT));
}
public DateRecognizerSinkFilter(DateFormat dateFormat) {

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
package org.apache.lucene.analysis.standard;
/*
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 9/30/11 12:10 PM from the specification file
* <tt>/lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
* on 08.07.12 16:59 from the specification file
* <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
*/
class ClassicTokenizerImpl implements StandardTokenizerInterface {
@ -383,15 +383,7 @@ public final void getText(CharTermAttribute t) {
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
ClassicTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Unpacks the compressed character translation table.

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC
// Generated using ICU4J 4.8.1.1 on Sunday, July 8, 2012 2:59:49 PM UTC
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
package org.apache.lucene.analysis.standard;
/*
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -759,15 +759,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
public StandardTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Unpacks the compressed character translation table.

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/18/12 12:05 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
package org.apache.lucene.analysis.standard;
@ -3844,15 +3844,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
public UAX29URLEmailTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Unpacks the compressed character translation table.

View File

@ -1,6 +1,6 @@
package org.apache.lucene.analysis.standard;
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.

View File

@ -1,8 +1,8 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/22/12 10:26 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
package org.apache.lucene.analysis.wikipedia;
/*
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 1/22/12 10:26 PM from the specification file
* <tt>/home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
* on 08.07.12 17:00 from the specification file
* <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@ -519,15 +519,7 @@ final void reset() {
this.zzReader = in;
}
/**
* Creates a new scanner.
* There is also java.io.Reader version of this constructor.
*
* @param in the java.io.Inputstream to read input from.
*/
WikipediaTokenizerImpl(java.io.InputStream in) {
this(new java.io.InputStreamReader(in));
}
/**
* Unpacks the compressed character translation table.

View File

@ -79,7 +79,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
final String term = termAtt.toString().toLowerCase(Locale.ENGLISH);
final String term = termAtt.toString().toLowerCase(Locale.ROOT);
termAtt.setEmpty().append(term);
}
return true;

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer;
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
public void test() throws IOException {
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT));
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);

View File

@ -18,6 +18,7 @@ package org.apache.lucene.analysis.sinks;
import java.io.IOException;
import java.io.StringReader;
import java.util.Locale;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.core.LowerCaseFilter;
@ -164,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1);
String[] lowerCaseTokens = new String[tokens1.length];
for (int i = 0; i < tokens1.length; i++)
lowerCaseTokens[i] = tokens1[i].toLowerCase();
lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
assertTokenStreamContents(lowerCasing, lowerCaseTokens);
}
@ -180,7 +181,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
StringBuilder buffer = new StringBuilder();
System.out.println("-----Tokens: " + tokCount[k] + "-----");
for (int i = 0; i < tokCount[k]; i++) {
buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));

View File

@ -32,7 +32,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
}
public void testConsumeWordInstance() {
BreakIterator bi = BreakIterator.getWordInstance();
// we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newWordInstance();
for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -43,7 +44,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
/* run this to test if your JRE is buggy
public void testWordInstanceJREBUG() {
BreakIterator bi = BreakIterator.getWordInstance();
// we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
Segment ci = new Segment();
for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
@ -60,7 +62,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
}
public void testConsumeSentenceInstance() {
BreakIterator bi = BreakIterator.getSentenceInstance();
// we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
@ -71,7 +74,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
/* run this to test if your JRE is buggy
public void testSentenceInstanceJREBUG() {
BreakIterator bi = BreakIterator.getSentenceInstance();
// we use the default locale, as its randomized by LuceneTestCase
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
Segment ci = new Segment();
for (int i = 0; i < 10000; i++) {
char text[] = _TestUtil.randomUnicodeString(random).toCharArray();

View File

@ -36,7 +36,7 @@ public class TestCharArrayMap extends LuceneTestCase {
key[j] = (char)random().nextInt(127);
}
String keyStr = new String(key);
String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ENGLISH) : keyStr;
String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr;
int val = random().nextInt();

View File

@ -208,16 +208,16 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
}
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
for (String upper : upperArr) {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
}
}
@ -235,8 +235,8 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
}
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
false);
@ -244,8 +244,8 @@ public class TestCharArraySet extends LuceneTestCase {
set.add(upper);
}
for (int i = 0; i < upperArr.length; i++) {
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(falsePos, upperArr[i]), set
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
.contains(lowerArr[i]));
}
}
@ -258,7 +258,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase());
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIngoreCase.add(Integer.valueOf(1));
@ -305,7 +305,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase());
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
setIngoreCase.add(Integer.valueOf(1));
@ -351,7 +351,7 @@ public class TestCharArraySet extends LuceneTestCase {
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
List<String> stopwordsUpper = new ArrayList<String>();
for (String string : stopwords) {
stopwordsUpper.add(string.toUpperCase());
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
}
set.addAll(Arrays.asList(TEST_STOP_WORDS));

View File

@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -53,7 +54,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
}
/*
@ -70,7 +71,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
}
builder.append("\ud801\udc1cabc");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
}
}
@ -84,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("A");
}
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
/*
@ -98,7 +99,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
}
builder.append("\ud801\udc1c");
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
}
// LUCENE-3642: normalize SMP->BMP and check that offsets are correct

View File

@ -123,11 +123,11 @@ public class GenerateJflexTLDMacros {
while (null != (line = reader.readLine())) {
Matcher matcher = TLD_PATTERN_1.matcher(line);
if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
} else {
matcher = TLD_PATTERN_2.matcher(line);
if (matcher.matches()) {
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
}
}
}
@ -146,7 +146,7 @@ public class GenerateJflexTLDMacros {
*/
private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
final DateFormat dateFormat = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US);
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
final Writer writer = new OutputStreamWriter
(new FileOutputStream(outputFile), "UTF-8");

View File

@ -64,7 +64,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
//
public void testCollationKeySort() throws Exception {
Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT));
Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
@ -73,7 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
// The ICU Collator and java.text.Collator implementations differ in their
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
testCollationKeySort
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
"BFJHD", "ECAGI", "BJDFH", "BJDHF");

View File

@ -29,7 +29,7 @@ public class GenerateHTMLStripCharFilterSupplementaryMacros {
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
private static final String NL = System.getProperty("line.separator");
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US);
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
}

View File

@ -32,7 +32,7 @@ public class GenerateJFlexSupplementaryMacros {
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
private static final String NL = System.getProperty("line.separator");
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
(DateFormat.FULL, DateFormat.FULL, Locale.US);
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
static {
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
}

View File

@ -607,7 +607,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
private void doTestBocchan(int numIterations) throws Exception {
LineNumberReader reader = new LineNumberReader(new InputStreamReader(
this.getClass().getResourceAsStream("bocchan.utf-8")));
this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8"));
String line = reader.readLine();
reader.close();

View File

@ -65,7 +65,7 @@ public class StempelStemmer {
DataInputStream in = null;
try {
in = new DataInputStream(new BufferedInputStream(stemmerTable));
String method = in.readUTF().toUpperCase(Locale.ENGLISH);
String method = in.readUTF().toUpperCase(Locale.ROOT);
if (method.indexOf('M') < 0) {
return new org.egothor.stemmer.Trie(in);
} else {

View File

@ -63,6 +63,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.Locale;
import java.util.StringTokenizer;
/**
@ -89,7 +90,7 @@ public class Compile {
return;
}
args[0].toUpperCase();
args[0].toUpperCase(Locale.ROOT);
backward = args[0].charAt(0) == '-';
int qq = (backward) ? 1 : 0;
@ -127,7 +128,7 @@ public class Compile {
new FileInputStream(args[i]), charset)));
for (String line = in.readLine(); line != null; line = in.readLine()) {
try {
line = line.toLowerCase();
line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken();
if (storeorig) {

View File

@ -55,9 +55,11 @@
package org.egothor.stemmer;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.util.Locale;
import java.util.StringTokenizer;
/**
@ -95,10 +97,11 @@ public class DiffIt {
// System.out.println("[" + args[i] + "]");
Diff diff = new Diff(ins, del, rep, nop);
try {
in = new LineNumberReader(new BufferedReader(new FileReader(args[i])));
String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
in = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[i]), charset)));
for (String line = in.readLine(); line != null; line = in.readLine()) {
try {
line = line.toLowerCase();
line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken();
System.out.println(stem + " -a");

View File

@ -60,12 +60,14 @@ import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.net.URI;
import java.util.Locale;
import java.util.StringTokenizer;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
public class TestCompile extends LuceneTestCase {
@ -107,7 +109,7 @@ public class TestCompile extends LuceneTestCase {
Trie trie;
DataInputStream is = new DataInputStream(new BufferedInputStream(
new FileInputStream(path)));
String method = is.readUTF().toUpperCase();
String method = is.readUTF().toUpperCase(Locale.ROOT);
if (method.indexOf('M') < 0) {
trie = new Trie(is);
} else {
@ -120,11 +122,11 @@ public class TestCompile extends LuceneTestCase {
private static void assertTrie(Trie trie, String file, boolean usefull,
boolean storeorig) throws Exception {
LineNumberReader in = new LineNumberReader(new BufferedReader(
new FileReader(file)));
new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8)));
for (String line = in.readLine(); line != null; line = in.readLine()) {
try {
line = line.toLowerCase();
line = line.toLowerCase(Locale.ROOT);
StringTokenizer st = new StringTokenizer(line);
String stem = st.nextToken();
if (storeorig) {
@ -132,7 +134,7 @@ public class TestCompile extends LuceneTestCase {
.getLastOnPath(stem);
StringBuilder stm = new StringBuilder(stem);
Diff.apply(stm, cmd);
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
}
while (st.hasMoreTokens()) {
String token = st.nextToken();
@ -143,7 +145,7 @@ public class TestCompile extends LuceneTestCase {
.getLastOnPath(token);
StringBuilder stm = new StringBuilder(token);
Diff.apply(stm, cmd);
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
}
} catch (java.util.NoSuchElementException x) {
// no base token (stem) on a line

View File

@ -262,9 +262,11 @@
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
<target name="clean-javacc">
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
<delete>
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
</delete>
</target>
<target name="javacc" depends="init,javacc-check" if="javacc.present">

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import org.apache.lucene.benchmark.byTask.utils.Algorithm;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
/**
@ -106,7 +107,7 @@ public class Benchmark {
Benchmark benchmark = null;
try {
benchmark = new Benchmark(new FileReader(algFile));
benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8));
} catch (Exception e) {
e.printStackTrace();
System.exit(1);

View File

@ -18,12 +18,14 @@ package org.apache.lucene.benchmark.byTask.feeds;
*/
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
@ -161,7 +163,7 @@ public class DirContentSource extends ContentSource {
dfi = new DateFormatInfo();
dfi.pos = new ParsePosition(0);
// date format: 30-MAR-1987 14:22:36.87
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.US);
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.ROOT);
dfi.df.setLenient(true);
dateFormat.set(dfi);
}
@ -198,7 +200,7 @@ public class DirContentSource extends ContentSource {
name = f.getCanonicalPath()+"_"+iteration;
}
BufferedReader reader = new BufferedReader(new FileReader(f));
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
String line = null;
//First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();

View File

@ -29,6 +29,7 @@ import java.util.Locale;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
import java.util.TimeZone;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.benchmark.byTask.utils.Config;
@ -182,8 +183,8 @@ public class DocMaker implements Closeable {
private boolean storeBytes = false;
private static class DateUtil {
public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
public Calendar cal = Calendar.getInstance();
public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ROOT);
public Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
public ParsePosition pos = new ParsePosition(0);
public DateUtil() {
parser.setLenient(true);

View File

@ -25,6 +25,7 @@ import java.io.InputStreamReader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config;
@ -146,7 +147,7 @@ public class EnwikiContentSource extends ContentSource {
case BODY:
body = contents.toString();
//workaround that startswith doesn't have an ignore case option, get at least 20 chars.
String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase();
String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(Locale.ROOT);
if (startsWith.startsWith("#redirect")) {
body = null;
}

View File

@ -5,6 +5,7 @@ import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
import java.io.*;
@ -59,13 +60,14 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
{
File file = new File(fileName);
Reader reader = null;
// note: we use a decoding reader, so if your queries are screwed up you know
if (file.exists()) {
reader = new FileReader(file);
reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8);
} else {
//see if we can find it as a resource
InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
if (asStream != null) {
reader = new InputStreamReader(asStream);
reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8);
}
}
if (reader != null) {

View File

@ -35,7 +35,7 @@ public class LongToEnglishContentSource extends ContentSource{
}
// TODO: we could take param to specify locale...
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
RuleBasedNumberFormat.SPELLOUT);
@Override
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {

View File

@ -37,7 +37,7 @@ public class LongToEnglishQueryMaker implements QueryMaker {
protected QueryParser parser;
// TODO: we could take param to specify locale...
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
RuleBasedNumberFormat.SPELLOUT);
public Query makeQuery(int size) throws Exception {

View File

@ -19,8 +19,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.text.DateFormat;
import java.text.ParsePosition;
import java.text.SimpleDateFormat;
@ -29,6 +30,7 @@ import java.util.Date;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.IOUtils;
/**
* A {@link ContentSource} reading from the Reuters collection.
@ -74,7 +76,7 @@ public class ReutersContentSource extends ContentSource {
if (dfi == null) {
dfi = new DateFormatInfo();
// date format: 30-MAR-1987 14:22:36.87
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.ROOT);
dfi.df.setLenient(true);
dfi.pos = new ParsePosition(0);
dateFormat.set(dfi);
@ -112,7 +114,7 @@ public class ReutersContentSource extends ContentSource {
name = f.getCanonicalPath() + "_" + iteration;
}
BufferedReader reader = new BufferedReader(new FileReader(f));
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
try {
// First line is the date, 3rd is the title, rest is body
String dateStr = reader.readLine();

View File

@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
dfi = new DateFormatInfo();
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
for (int i = 0; i < dfi.dfs.length; i++) {
dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US);
dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.ROOT);
dfi.dfs[i].setLenient(true);
}
dfi.pos = new ParsePosition(0);

View File

@ -47,7 +47,7 @@ public abstract class TrecDocParser {
static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
static {
for (ParsePathType ppt : ParsePathType.values()) {
pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT),ppt);
}
}
@ -60,7 +60,7 @@ public abstract class TrecDocParser {
public static ParsePathType pathType(File f) {
int pathLength = 0;
while (f != null && ++pathLength < MAX_PATH_LENGTH) {
ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ROOT));
if (ppt!=null) {
return ppt;
}

View File

@ -0,0 +1,112 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implemetation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
}
/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */

View File

@ -0,0 +1,123 @@
// FastCharStream.java
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
public final void backup(int amount) {
bufferPosition -= amount;
}
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
public final void Done() {
try {
input.close();
} catch (IOException e) {
}
}
public final int getColumn() {
return bufferStart + bufferPosition;
}
public final int getLine() {
return 1;
}
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
public final int getEndLine() {
return 1;
}
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
public final int getBeginLine() {
return 1;
}
}

View File

@ -29,6 +29,10 @@ public class HTMLParser implements HTMLParserConstants {
private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null;
public HTMLParser(Reader reader) {
this(new FastCharStream(reader));
}
private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){
@ -227,7 +231,7 @@ InterruptedException {
Token t1, t2;
boolean inImg = false;
t1 = jj_consume_token(TagName);
String tagName = t1.image.toLowerCase(Locale.ENGLISH);
String tagName = t1.image.toLowerCase(Locale.ROOT);
if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace();
}
@ -264,7 +268,7 @@ InterruptedException {
)
&& t2 != null)
{
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
@ -272,7 +276,7 @@ InterruptedException {
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null)
{
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
@ -464,7 +468,6 @@ null)
/** Generated Token Manager. */
public HTMLParserTokenManager token_source;
SimpleCharStream jj_input_stream;
/** Current token. */
public Token token;
/** Next token. */
@ -485,14 +488,9 @@ null)
private boolean jj_rescan = false;
private int jj_gc = 0;
/** Constructor with InputStream. */
public HTMLParser(java.io.InputStream stream) {
this(stream, null);
}
/** Constructor with InputStream and supplied encoding */
public HTMLParser(java.io.InputStream stream, String encoding) {
try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source = new HTMLParserTokenManager(jj_input_stream);
/** Constructor with user supplied CharStream. */
public HTMLParser(CharStream stream) {
token_source = new HTMLParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@ -501,35 +499,8 @@ null)
}
/** Reinitialise. */
public void ReInit(java.io.InputStream stream) {
ReInit(stream, null);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream stream, String encoding) {
try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
token_source.ReInit(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Constructor. */
public HTMLParser(java.io.Reader stream) {
jj_input_stream = new SimpleCharStream(stream, 1, 1);
token_source = new HTMLParserTokenManager(jj_input_stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Reinitialise. */
public void ReInit(java.io.Reader stream) {
jj_input_stream.ReInit(stream, 1, 1);
token_source.ReInit(jj_input_stream);
public void ReInit(CharStream stream) {
token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@ -631,7 +602,7 @@ null)
return (jj_ntk = jj_nt.kind);
}
private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
private java.util.List jj_expentries = new java.util.ArrayList();
private int[] jj_expentry;
private int jj_kind = -1;
private int[] jj_lasttokens = new int[100];
@ -691,7 +662,7 @@ null)
jj_add_error_token(0, 0);
int[][] exptokseq = new int[jj_expentries.size()][];
for (int i = 0; i < jj_expentries.size(); i++) {
exptokseq[i] = jj_expentries.get(i);
exptokseq[i] = (int[])jj_expentries.get(i);
}
return new ParseException(token, exptokseq, tokenImage);
}

View File

@ -22,6 +22,7 @@ options {
//DEBUG_LOOKAHEAD = true;
//DEBUG_TOKEN_MANAGER = true;
UNICODE_INPUT = true;
USER_CHAR_STREAM=true;
}
PARSER_BEGIN(HTMLParser)
@ -56,6 +57,10 @@ public class HTMLParser {
private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null;
public HTMLParser(Reader reader) {
this(new FastCharStream(reader));
}
private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){
@ -227,7 +232,7 @@ void Tag() throws IOException :
}
{
t1=<TagName> {
String tagName = t1.image.toLowerCase(Locale.ENGLISH);
String tagName = t1.image.toLowerCase(Locale.ROOT);
if(Tags.WS_ELEMS.contains(tagName) ) {
addSpace();
}
@ -249,7 +254,7 @@ void Tag() throws IOException :
)
&& t2 != null)
{
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}
@ -257,7 +262,7 @@ void Tag() throws IOException :
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
null)
{
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
if(currentMetaTag != null && currentMetaContent != null) {
addMetaTag();
}

View File

@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, int curPos)
}
else
{
int hiByte = (curChar >> 8);
int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
static final long[] jjtoSkip = {
0x400000L,
};
protected SimpleCharStream input_stream;
protected CharStream input_stream;
private final int[] jjrounds = new int[28];
private final int[] jjstateSet = new int[56];
protected char curChar;
/** Constructor. */
public HTMLParserTokenManager(SimpleCharStream stream){
if (SimpleCharStream.staticFlag)
throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
public HTMLParserTokenManager(CharStream stream){
input_stream = stream;
}
/** Constructor. */
public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
public HTMLParserTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}
/** Reinitialise parser. */
public void ReInit(SimpleCharStream stream)
public void ReInit(CharStream stream)
{
jjmatchedPos = jjnewStateCnt = 0;
curLexState = defaultLexState;
@ -1475,7 +1473,7 @@ private void ReInitRounds()
}
/** Reinitialise parser. */
public void ReInit(SimpleCharStream stream, int lexState)
public void ReInit(CharStream stream, int lexState)
{
ReInit(stream);
SwitchTo(lexState);

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */

View File

@ -1,472 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/**
* An implementation of interface CharStream, where the stream is assumed to
* contain only ASCII characters (without unicode processing).
*/
public class SimpleCharStream
{
/** Whether parser is static. */
public static final boolean staticFlag = false;
int bufsize;
int available;
int tokenBegin;
/** Position in buffer. */
public int bufpos = -1;
protected int bufline[];
protected int bufcolumn[];
protected int column = 0;
protected int line = 1;
protected boolean prevCharIsCR = false;
protected boolean prevCharIsLF = false;
protected java.io.Reader inputStream;
protected char[] buffer;
protected int maxNextCharInd = 0;
protected int inBuf = 0;
protected int tabSize = 8;
protected void setTabSize(int i) { tabSize = i; }
protected int getTabSize(int i) { return tabSize; }
protected void ExpandBuff(boolean wrapAround)
{
char[] newbuffer = new char[bufsize + 2048];
int newbufline[] = new int[bufsize + 2048];
int newbufcolumn[] = new int[bufsize + 2048];
try
{
if (wrapAround)
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
System.arraycopy(buffer, 0, newbuffer,
bufsize - tokenBegin, bufpos);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos += (bufsize - tokenBegin));
}
else
{
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
buffer = newbuffer;
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
bufline = newbufline;
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
bufcolumn = newbufcolumn;
maxNextCharInd = (bufpos -= tokenBegin);
}
}
catch (Throwable t)
{
throw new Error(t.getMessage());
}
bufsize += 2048;
available = bufsize;
tokenBegin = 0;
}
protected void FillBuff() throws java.io.IOException
{
if (maxNextCharInd == available)
{
if (available == bufsize)
{
if (tokenBegin > 2048)
{
bufpos = maxNextCharInd = 0;
available = tokenBegin;
}
else if (tokenBegin < 0)
bufpos = maxNextCharInd = 0;
else
ExpandBuff(false);
}
else if (available > tokenBegin)
available = bufsize;
else if ((tokenBegin - available) < 2048)
ExpandBuff(true);
else
available = tokenBegin;
}
int i;
try {
if ((i = inputStream.read(buffer, maxNextCharInd,
available - maxNextCharInd)) == -1)
{
inputStream.close();
throw new java.io.IOException();
}
else
maxNextCharInd += i;
return;
}
catch(java.io.IOException e) {
--bufpos;
backup(0);
if (tokenBegin == -1)
tokenBegin = bufpos;
throw e;
}
}
/** Start. */
public char BeginToken() throws java.io.IOException
{
tokenBegin = -1;
char c = readChar();
tokenBegin = bufpos;
return c;
}
protected void UpdateLineColumn(char c)
{
column++;
if (prevCharIsLF)
{
prevCharIsLF = false;
line += (column = 1);
}
else if (prevCharIsCR)
{
prevCharIsCR = false;
if (c == '\n')
{
prevCharIsLF = true;
}
else
line += (column = 1);
}
switch (c)
{
case '\r' :
prevCharIsCR = true;
break;
case '\n' :
prevCharIsLF = true;
break;
case '\t' :
column--;
column += (tabSize - (column % tabSize));
break;
default :
break;
}
bufline[bufpos] = line;
bufcolumn[bufpos] = column;
}
/** Read a character. */
public char readChar() throws java.io.IOException
{
if (inBuf > 0)
{
--inBuf;
if (++bufpos == bufsize)
bufpos = 0;
return buffer[bufpos];
}
if (++bufpos >= maxNextCharInd)
FillBuff();
char c = buffer[bufpos];
UpdateLineColumn(c);
return c;
}
/**
* @deprecated
* @see #getEndColumn
*/
public int getColumn() {
return bufcolumn[bufpos];
}
/**
* @deprecated
* @see #getEndLine
*/
public int getLine() {
return bufline[bufpos];
}
/** Get token end column number. */
public int getEndColumn() {
return bufcolumn[bufpos];
}
/** Get token end line number. */
public int getEndLine() {
return bufline[bufpos];
}
/** Get token beginning column number. */
public int getBeginColumn() {
return bufcolumn[tokenBegin];
}
/** Get token beginning line number. */
public int getBeginLine() {
return bufline[tokenBegin];
}
/** Backup a number of characters. */
public void backup(int amount) {
inBuf += amount;
if ((bufpos -= amount) < 0)
bufpos += bufsize;
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.Reader dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn, int buffersize)
{
inputStream = dstream;
line = startline;
column = startcolumn - 1;
if (buffer == null || buffersize != buffer.length)
{
available = bufsize = buffersize;
buffer = new char[buffersize];
bufline = new int[buffersize];
bufcolumn = new int[buffersize];
}
prevCharIsLF = prevCharIsCR = false;
tokenBegin = inBuf = maxNextCharInd = 0;
bufpos = -1;
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.Reader dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, int startline,
int startcolumn)
{
this(dstream, startline, startcolumn, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
this(dstream, encoding, 1, 1, 4096);
}
/** Constructor. */
public SimpleCharStream(java.io.InputStream dstream)
{
this(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
{
ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn, int buffersize)
{
ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream)
{
ReInit(dstream, 1, 1, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
int startcolumn) throws java.io.UnsupportedEncodingException
{
ReInit(dstream, encoding, startline, startcolumn, 4096);
}
/** Reinitialise. */
public void ReInit(java.io.InputStream dstream, int startline,
int startcolumn)
{
ReInit(dstream, startline, startcolumn, 4096);
}
/** Get token literal value. */
public String GetImage()
{
if (bufpos >= tokenBegin)
return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
else
return new String(buffer, tokenBegin, bufsize - tokenBegin) +
new String(buffer, 0, bufpos + 1);
}
/** Get the suffix. */
public char[] GetSuffix(int len)
{
char[] ret = new char[len];
if ((bufpos + 1) >= len)
System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
else
{
System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
len - bufpos - 1);
System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
}
return ret;
}
/** Reset buffer when finished. */
public void Done()
{
buffer = null;
bufline = null;
bufcolumn = null;
}
/**
* Method to adjust line and column numbers for the start of a token.
*/
public void adjustBeginLineColumn(int newLine, int newCol)
{
int start = tokenBegin;
int len;
if (bufpos >= tokenBegin)
{
len = bufpos - tokenBegin + inBuf + 1;
}
else
{
len = bufsize - tokenBegin + bufpos + 1 + inBuf;
}
int i = 0, j = 0, k = 0;
int nextColDiff = 0, columnDiff = 0;
while (i < len &&
bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
{
bufline[j] = newLine;
nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
bufcolumn[j] = newCol + columnDiff;
columnDiff = nextColDiff;
i++;
}
if (i < len)
{
bufline[j] = newLine++;
bufcolumn[j] = newCol + columnDiff;
while (i++ < len)
{
if (bufline[j = start % bufsize] != bufline[++start % bufsize])
bufline[j] = newLine++;
else
bufline[j] = newLine;
}
}
line = bufline[j];
column = bufcolumn[j];
}
}
/* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */

View File

@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */

View File

@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
*/
import java.text.NumberFormat;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@ -61,7 +62,7 @@ public class AddDocTask extends PerfTask {
@Override
protected String getLogMessage(int recsCount) {
return String.format("added %9d docs",recsCount);
return String.format(Locale.ROOT, "added %9d docs",recsCount);
}
@Override

View File

@ -40,6 +40,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.charset.Charset;
/**
* Create an index. <br>
@ -182,7 +183,7 @@ public class CreateIndexTask extends PerfTask {
iwc.setInfoStream(System.err);
} else {
File f = new File(infoStreamVal).getAbsoluteFile();
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name()));
}
}
IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);

View File

@ -17,6 +17,8 @@ package org.apache.lucene.benchmark.byTask.tasks;
* limitations under the License.
*/
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
@ -266,7 +268,7 @@ public abstract class PerfTask implements Cloneable {
public void tearDown() throws Exception {
if (++logStepCount % logStep == 0) {
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
System.out.println(String.format("%7.2f",time) + " sec --> "
System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> "
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
}
}

View File

@ -77,7 +77,7 @@ public class SearchWithSortTask extends ReadTask {
} else {
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
}
sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH)));
sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT)));
}
sortFields[upto++] = sortField0;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.text.NumberFormat;
import org.apache.lucene.benchmark.byTask.PerfRunData;
@ -428,7 +429,7 @@ public class TaskSequence extends PerfTask {
sb.append(padd);
sb.append(!letChildReport ? ">" : (parallel ? "]" : "}"));
if (fixedTime) {
sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s");
sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s");
} else if (repetitions>1) {
sb.append(" * " + repetitions);
} else if (repetitions==REPEAT_EXHAUST) {
@ -487,7 +488,7 @@ public class TaskSequence extends PerfTask {
if (rate>0) {
seqName += "_" + rate + (perMin?"/min":"/sec");
}
if (parallel && seqName.toLowerCase().indexOf("par")<0) {
if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) {
seqName += "_Par";
}
}

View File

@ -22,6 +22,7 @@ import java.io.StringReader;
import java.lang.reflect.Constructor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Locale;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
@ -159,7 +160,7 @@ public class Algorithm {
} else {
stok.nextToken();
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
String unit = stok.sval.toLowerCase();
String unit = stok.sval.toLowerCase(Locale.ROOT);
if ("min".equals(unit)) {
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
} else if ("sec".equals(unit)) {

View File

@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.utils;
*/
import java.text.NumberFormat;
import java.util.Locale;
/**
* Formatting utilities (for reports).
@ -25,9 +26,9 @@ import java.text.NumberFormat;
public class Format {
private static NumberFormat numFormat [] = {
NumberFormat.getInstance(),
NumberFormat.getInstance(),
NumberFormat.getInstance(),
NumberFormat.getInstance(Locale.ROOT),
NumberFormat.getInstance(Locale.ROOT),
NumberFormat.getInstance(Locale.ROOT),
};
private static final String padd = " ";

View File

@ -99,7 +99,7 @@ public class StreamUtils {
String fileName = file.getName();
int idx = fileName.lastIndexOf('.');
if (idx != -1) {
type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT));
}
return type==null ? Type.PLAIN : type;
}

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality;
import java.io.PrintWriter;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Locale;
/**
* Results of quality benchmark run for a single query or for a set of queries.
@ -141,7 +142,7 @@ public class QualityStats {
logger.println(title);
}
prefix = prefix==null ? "" : prefix;
NumberFormat nf = NumberFormat.getInstance();
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(3);
nf.setMinimumFractionDigits(3);
nf.setGroupingUsed(true);

View File

@ -24,11 +24,13 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.IOUtils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.util.HashSet;
import java.util.Set;
@ -51,7 +53,7 @@ public class QueryDriver {
File topicsFile = new File(args[0]);
File qrelsFile = new File(args[1]);
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
FSDirectory dir = FSDirectory.open(new File(args[3]));
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
IndexReader reader = DirectoryReader.open(dir);
@ -60,14 +62,14 @@ public class QueryDriver {
int maxResults = 1000;
String docNameField = "docname";
PrintWriter logger = new PrintWriter(System.out, true);
PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
// use trec utilities to read trec topics into quality queries
TrecTopicsReader qReader = new TrecTopicsReader();
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
// prepare judge, with trec utilities that read from a QRels file
Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
// validate topics & judgments match each other
judge.validateData(qqs, logger);

View File

@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality.utils;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.NumberFormat;
import java.util.Locale;
import org.apache.lucene.benchmark.quality.QualityQuery;
import org.apache.lucene.search.ScoreDoc;
@ -45,7 +46,7 @@ public class SubmissionReport {
public SubmissionReport (PrintWriter logger, String name) {
this.logger = logger;
this.name = name;
nf = NumberFormat.getInstance();
nf = NumberFormat.getInstance(Locale.ROOT);
nf.setMaximumFractionDigits(4);
nf.setMinimumFractionDigits(4);
}

View File

@ -19,12 +19,18 @@ package org.apache.lucene.benchmark.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.util.IOUtils;
/**
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
@ -73,7 +79,7 @@ public class ExtractReuters {
*/
protected void extractFile(File sgmFile) {
try {
BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
StringBuilder buffer = new StringBuilder(1024);
StringBuilder outBuffer = new StringBuilder(1024);
@ -107,7 +113,7 @@ public class ExtractReuters {
File outFile = new File(outputDir, sgmFile.getName() + "-"
+ (docNumber++) + ".txt");
// System.out.println("Writing " + outFile);
FileWriter writer = new FileWriter(outFile);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
writer.write(out);
writer.close();
outBuffer.setLength(0);

View File

@ -18,8 +18,10 @@ package org.apache.lucene.benchmark.utils;
*/
import java.io.File;
import java.io.FileWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@ -28,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
import org.apache.lucene.util.IOUtils;
/**
* Extract the downloaded Wikipedia dump into separate files for indexing.
@ -83,7 +86,7 @@ public class ExtractWikipedia {
contents.append("\n");
try {
FileWriter writer = new FileWriter(f);
Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
writer.write(contents.toString());
writer.close();
} catch (IOException ioe) {

View File

@ -166,7 +166,7 @@ public class DocMakerTest extends BenchmarkTestCase {
// DocMaker did not close its ContentSource if resetInputs was called twice,
// leading to a file handle leak.
File f = new File(getWorkDir(), "docMakerLeak.txt");
PrintStream ps = new PrintStream(f);
PrintStream ps = new PrintStream(f, "UTF-8");
ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
ps.close();

View File

@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.Properties;
import org.apache.lucene.benchmark.BenchmarkTestCase;
@ -50,7 +51,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
PrintStream curOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
System.setOut(new PrintStream(baos));
System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name()));
try {
PerfRunData runData = createPerfRunData("SystemOut");
CreateIndexTask cit = new CreateIndexTask(runData);
@ -63,7 +64,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
PrintStream curErr = System.err;
baos.reset();
System.setErr(new PrintStream(baos));
System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name()));
try {
PerfRunData runData = createPerfRunData("SystemErr");
CreateIndexTask cit = new CreateIndexTask(runData);

View File

@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
import org.apache.commons.compress.compressors.CompressorStreamFactory;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
import org.junit.After;
import org.junit.Before;
@ -88,7 +89,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
private File rawTextFile(String ext) throws Exception {
File f = new File(testDir,"testfile." + ext);
BufferedWriter w = new BufferedWriter(new FileWriter(f));
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@ -117,7 +118,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
}
private void writeText(OutputStream os) throws IOException {
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
w.write(TEXT);
w.newLine();
w.close();
@ -125,7 +126,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
private void assertReadText(File f) throws Exception {
InputStream ir = StreamUtils.inputStream(f);
InputStreamReader in = new InputStreamReader(ir);
InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
BufferedReader r = new BufferedReader(in);
String line = r.readLine();
assertEquals("Wrong text found in "+f.getName(), TEXT, line);

View File

@ -31,7 +31,9 @@ import java.io.BufferedReader;
import java.io.File;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
/**
* Test that quality run does its job.
@ -55,7 +57,7 @@ public class TestQualityRun extends BenchmarkTestCase {
int maxResults = 1000;
String docNameField = "doctitle"; // orig docID is in the linedoc format title
PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null;
PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null;
// prepare topics
InputStream topics = getClass().getResourceAsStream("trecTopics.txt");

View File

@ -169,11 +169,19 @@
</clover-report>
</target>
<!-- Validate once from top-level. -->
<target name="validate" depends="compile-tools,resolve" description="Validate legal stuff.">
<!-- Validation (license/notice/api checks). -->
<target name="validate" depends="check-licenses,check-forbidden-apis" description="Validate stuff." />
<target name="check-licenses" depends="compile-tools,resolve,load-custom-tasks" description="Validate license stuff.">
<license-check-macro dir="${basedir}" />
</target>
<target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks" description="Check forbidden API calls in compiled class files.">
<forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/jdk.txt">
<fileset dir="${basedir}/build" includes="**/*.class" />
</forbidden-apis>
</target>
<target name="resolve">
<sequential>
<ant dir="test-framework" target="resolve" inheritall="false">

View File

@ -68,6 +68,7 @@
executable="${python.exe}" failonerror="true">
<arg line="createLevAutomata.py @{n} False"/>
</exec>
<fixcrlf srcdir="src/java/org/apache/lucene/util/automaton" includes="*ParametricDescription.java" encoding="UTF-8"/>
</sequential>
</macrodef>

View File

@ -20,8 +20,10 @@ package org.apache.lucene.codecs;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.Comparator;
import java.util.Iterator;
import java.util.Locale;
import java.util.TreeMap;
import org.apache.lucene.index.DocsAndPositionsEnum;
@ -345,7 +347,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
@Override
public String toString() {
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
final PrintStream out = new PrintStream(bos);
PrintStream out;
try {
out = new PrintStream(bos, false, "UTF-8");
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
out.println(" index FST:");
out.println(" " + indexNodeCount + " nodes");
@ -353,7 +360,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
out.println(" " + indexNumBytes + " bytes");
out.println(" terms:");
out.println(" " + totalTermCount + " terms");
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format("%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
out.println(" blocks:");
out.println(" " + totalBlockCount + " blocks");
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
@ -362,9 +369,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
out.println(" " + floorBlockCount + " floor blocks");
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
out.println(" " + floorSubBlockCount + " floor sub-blocks");
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
if (totalBlockCount != 0) {
out.println(" by prefix length:");
int total = 0;
@ -372,13 +379,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
final int blockCount = blockCountByPrefixLen[prefix];
total += blockCount;
if (blockCount != 0) {
out.println(" " + String.format("%2d", prefix) + ": " + blockCount);
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
}
}
assert totalBlockCount == total;
}
return bos.toString();
try {
return bos.toString("UTF-8");
} catch (UnsupportedEncodingException bogus) {
throw new RuntimeException(bogus);
}
}
}

View File

@ -53,7 +53,7 @@ public class DateTools {
private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() {
@Override
protected Calendar initialValue() {
return Calendar.getInstance(GMT, Locale.US);
return Calendar.getInstance(GMT, Locale.ROOT);
}
};
@ -194,7 +194,7 @@ public class DateTools {
this.formatLen = formatLen;
// formatLen 10's place: 11111111
// formatLen 1's place: 12345678901234567
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US);
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.ROOT);
this.format.setTimeZone(GMT);
}
@ -202,7 +202,7 @@ public class DateTools {
* in lowercase (for backwards compatibility) */
@Override
public String toString() {
return super.toString().toLowerCase(Locale.ENGLISH);
return super.toString().toLowerCase(Locale.ROOT);
}
}

View File

@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.lucene.codecs.BlockTreeTermsReader;
@ -341,7 +342,7 @@ public class CheckIndex {
* you only call this when the index is not opened by any
* writer. */
public Status checkIndex(List<String> onlySegments) throws IOException {
NumberFormat nf = NumberFormat.getInstance();
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
SegmentInfos sis = new SegmentInfos();
Status result = new Status();
result.dir = dir;

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.HashSet;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.codecs.Codec;
@ -181,7 +182,7 @@ class DocumentsWriterPerThread {
private int flushedDocCount;
DocumentsWriterDeleteQueue deleteQueue;
DeleteSlice deleteSlice;
private final NumberFormat nf = NumberFormat.getInstance();
private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
final Allocator byteBlockAllocator;

View File

@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
@ -3610,7 +3611,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
// lost...
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
}
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -535,7 +536,7 @@ public abstract class LogMergePolicy extends MergePolicy {
if (size >= maxMergeSize) {
extra += " [skip: too large]";
}
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra);
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.Locale;
import java.util.Map;
import java.util.Collection;
import java.util.Collections;
@ -289,7 +290,7 @@ public class TieredMergePolicy extends MergePolicy {
} else if (segBytes < floorSegmentBytes) {
extra += " [floored]";
}
message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
message(" seg=" + writer.get().segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra);
}
minSegmentBytes = Math.min(segBytes, minSegmentBytes);
@ -388,7 +389,7 @@ public class TieredMergePolicy extends MergePolicy {
final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
if (verbose()) {
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.));
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.));
}
// If we are already running a max sized merge
@ -413,7 +414,7 @@ public class TieredMergePolicy extends MergePolicy {
}
if (verbose()) {
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
}
} else {
return spec;
@ -475,7 +476,7 @@ public class TieredMergePolicy extends MergePolicy {
@Override
public String getExplanation() {
return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio);
return "skew=" + String.format(Locale.ROOT, "%.3f", skew) + " nonDelRatio=" + String.format(Locale.ROOT, "%.3f", nonDelRatio);
}
};
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License.
*/
import java.util.Locale;
import org.apache.lucene.search.Explanation;
/**
@ -92,6 +94,6 @@ public class LMDirichletSimilarity extends LMSimilarity {
@Override
public String getName() {
return String.format("Dirichlet(%f)", getMu());
return String.format(Locale.ROOT, "Dirichlet(%f)", getMu());
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License.
*/
import java.util.Locale;
import org.apache.lucene.search.Explanation;
/**
@ -72,6 +74,6 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
@Override
public String getName() {
return String.format("Jelinek-Mercer(%f)", getLambda());
return String.format(Locale.ROOT, "Jelinek-Mercer(%f)", getLambda());
}
}

View File

@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
* limitations under the License.
*/
import java.util.Locale;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.TermStatistics;
@ -91,9 +93,9 @@ public abstract class LMSimilarity extends SimilarityBase {
public String toString() {
String coll = collectionModel.getName();
if (coll != null) {
return String.format("LM %s - %s", getName(), coll);
return String.format(Locale.ROOT, "LM %s - %s", getName(), coll);
} else {
return String.format("LM %s", getName());
return String.format(Locale.ROOT, "LM %s", getName());
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License.
*/
import java.util.Locale;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicInteger;
@ -43,7 +44,7 @@ public class NamedThreadFactory implements ThreadFactory {
final SecurityManager s = System.getSecurityManager();
group = (s != null) ? s.getThreadGroup() : Thread.currentThread()
.getThreadGroup();
this.threadNamePrefix = String.format(NAME_PATTERN,
this.threadNamePrefix = String.format(Locale.ROOT, NAME_PATTERN,
checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement());
}
@ -57,7 +58,7 @@ public class NamedThreadFactory implements ThreadFactory {
* @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable)
*/
public Thread newThread(Runnable r) {
final Thread t = new Thread(group, r, String.format("%s-%d",
final Thread t = new Thread(group, r, String.format(Locale.ROOT, "%s-%d",
this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
t.setDaemon(false);
t.setPriority(Thread.NORM_PRIORITY);

View File

@ -559,7 +559,7 @@ public final class RamUsageEstimator {
*/
public static String humanReadableUnits(long bytes) {
return humanReadableUnits(bytes,
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH)));
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ROOT)));
}
/**

View File

@ -73,7 +73,7 @@ public enum Version {
}
public static Version parseLeniently(String version) {
String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH);
String parsedMatchVersion = version.toUpperCase(Locale.ROOT);
return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
}
}

View File

@ -121,7 +121,7 @@ def main():
w('package org.apache.lucene.util.automaton;')
w('')
w('/**')
w('/*')
w(' * Licensed to the Apache Software Foundation (ASF) under one or more')
w(' * contributor license agreements. See the NOTICE file distributed with')
w(' * this work for additional information regarding copyright ownership.')

View File

@ -159,7 +159,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
public void testAppendableInterface() {
CharTermAttributeImpl t = new CharTermAttributeImpl();
Formatter formatter = new Formatter(t, Locale.US);
Formatter formatter = new Formatter(t, Locale.ROOT);
formatter.format("%d", 1234);
assertEquals("1234", t.toString());
formatter.format("%d", 5678);

View File

@ -71,7 +71,7 @@ public class Test10KPulsings extends LuceneTestCase {
Field field = newField("field", "", ft);
document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 10050; i++) {
field.setStringValue(df.format(i));
@ -122,7 +122,7 @@ public class Test10KPulsings extends LuceneTestCase {
Field field = newField("field", "", ft);
document.add(field);
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
final int freq = freqCutoff + 1;

View File

@ -37,7 +37,7 @@ public class TestBinaryDocument extends LuceneTestCase {
{
FieldType ft = new FieldType();
ft.setStored(true);
StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes());
StoredField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8"));
Field stringFldStored = new Field("stringStored", binaryValStored, ft);
Document doc = new Document();
@ -62,7 +62,7 @@ public class TestBinaryDocument extends LuceneTestCase {
/** fetch the binary stored field and compare it's content with the original one */
BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
assertNotNull(bytes);
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length);
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8");
assertTrue(binaryFldStoredTest.equals(binaryValStored));
/** fetch the string field and compare it's content with the original one */
@ -75,7 +75,7 @@ public class TestBinaryDocument extends LuceneTestCase {
}
public void testCompressionTools() throws Exception {
StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()));
StoredField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8")));
StoredField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed));
Document doc = new Document();
@ -94,7 +94,7 @@ public class TestBinaryDocument extends LuceneTestCase {
assertTrue(docFromReader != null);
/** fetch the binary compressed field and compare it's content with the original one */
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8");
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));

View File

@ -61,12 +61,12 @@ public class TestDateTools extends LuceneTestCase {
public void testStringtoTime() throws ParseException {
long time = DateTools.stringToTime("197001010000");
Calendar cal = new GregorianCalendar();
// we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear();
cal.set(1970, 0, 1, // year=1970, month=january, day=1
0, 0, 0); // hour, minute, second
cal.set(Calendar.MILLISECOND, 0);
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
assertEquals(cal.getTime().getTime(), time);
cal.set(1980, 1, 2, // year=1980, month=february, day=2
11, 5, 0); // hour, minute, second
@ -76,9 +76,9 @@ public class TestDateTools extends LuceneTestCase {
}
public void testDateAndTimetoString() throws ParseException {
Calendar cal = new GregorianCalendar();
// we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear();
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
22, 8, 56); // hour, minute, second
cal.set(Calendar.MILLISECOND, 333);
@ -141,9 +141,9 @@ public class TestDateTools extends LuceneTestCase {
}
public void testRound() {
Calendar cal = new GregorianCalendar();
// we use default locale since LuceneTestCase randomizes it
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
cal.clear();
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
22, 8, 56); // hour, minute, second
cal.set(Calendar.MILLISECOND, 333);
@ -180,7 +180,7 @@ public class TestDateTools extends LuceneTestCase {
}
private String isoFormat(Date date) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT);
sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
return sdf.format(date);
}

View File

@ -220,10 +220,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos));
checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
CheckIndex.Status indexStatus = checker.checkIndex();
assertFalse(indexStatus.clean);
assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName()));
assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName()));
dir.close();
_TestUtil.rmDir(oldIndxeDir);

View File

@ -52,12 +52,12 @@ public class TestCheckIndex extends LuceneTestCase {
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.setInfoStream(new PrintStream(bos));
checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
if (VERBOSE) checker.setInfoStream(System.out);
CheckIndex.Status indexStatus = checker.checkIndex();
if (indexStatus.clean == false) {
System.out.println("CheckIndex failed");
System.out.println(bos.toString());
System.out.println(bos.toString("UTF-8"));
fail();
}

View File

@ -17,11 +17,14 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
@ -78,14 +81,14 @@ public class TestDoc extends LuceneTestCase {
}
private File createOutput(String name, String text) throws IOException {
FileWriter fw = null;
Writer fw = null;
PrintWriter pw = null;
try {
File f = new File(workDir, name);
if (f.exists()) f.delete();
fw = new FileWriter(f);
fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
pw = new PrintWriter(fw);
pw.println(text);
return f;
@ -182,9 +185,11 @@ public class TestDoc extends LuceneTestCase {
{
File file = new File(workDir, fileName);
Document doc = new Document();
doc.add(new TextField("contents", new FileReader(file), Field.Store.NO));
InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
doc.add(new TextField("contents", is, Field.Store.NO));
writer.addDocument(doc);
writer.commit();
is.close();
return writer.newestSegment();
}

View File

@ -43,9 +43,8 @@ public class TestPayloads extends LuceneTestCase {
// Simple tests to test the Payload class
public void testPayload() throws Exception {
byte[] testData = "This is a test!".getBytes();
BytesRef payload = new BytesRef(testData);
assertEquals("Wrong payload length.", testData.length, payload.length);
BytesRef payload = new BytesRef("This is a test!");
assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
BytesRef clone = payload.clone();
assertEquals(payload.length, clone.length);
@ -73,7 +72,7 @@ public class TestPayloads extends LuceneTestCase {
// enabled in only some documents
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// only add payload data for field f2
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
writer.addDocument(d);
// flush
writer.close();
@ -96,8 +95,8 @@ public class TestPayloads extends LuceneTestCase {
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
// add payload data for field f2 and f3
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3);
analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
analyzer.setPayloadData("f3", "somedata".getBytes("UTF-8"), 0, 3);
writer.addDocument(d);
// force merge

View File

@ -29,6 +29,8 @@ import org.junit.AfterClass;
import org.junit.BeforeClass;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Locale;
import java.util.Random;
/** Test that BooleanQuery.setMinimumNumberShouldMatch works.
@ -378,7 +380,7 @@ public class TestBooleanMinShouldMatch extends LuceneTestCase {
System.err.println("------- " + test + " -------");
DecimalFormat f = new DecimalFormat("0.000000");
DecimalFormat f = new DecimalFormat("0.000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
for (int i = 0; i < h.length; i++) {
StoredDocument d = searcher.doc(h[i].doc);

View File

@ -19,8 +19,10 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Calendar;
import java.util.GregorianCalendar;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.TimeZone;
import java.util.TreeMap;
import org.apache.lucene.document.DateTools;
@ -230,10 +232,12 @@ public class TestCustomSearcherSort extends LuceneTestCase {
private class RandomGen {
RandomGen(Random random) {
this.random = random;
base.set(1980, 1, 1);
}
private Random random;
private Calendar base = new GregorianCalendar(1980, 1, 1);
// we use the default Locale/TZ since LuceneTestCase randomizes it
private Calendar base = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
// Just to generate some different Lucene Date strings
private String getLuceneDate() {

View File

@ -36,6 +36,8 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.Locale;
import java.io.IOException;
/**
@ -486,7 +488,7 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
System.err.println("------- " + test + " -------");
DecimalFormat f = new DecimalFormat("0.000000000");
DecimalFormat f = new DecimalFormat("0.000000000", DecimalFormatSymbols.getInstance(Locale.ROOT));
for (int i = 0; i < h.length; i++) {
StoredDocument d = searcher.doc(h[i].doc);

View File

@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@ -117,10 +118,10 @@ public class TestFieldCache extends LuceneTestCase {
try {
FieldCache cache = FieldCache.DEFAULT;
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
cache.setInfoStream(new PrintStream(bos));
cache.setInfoStream(new PrintStream(bos, false, "UTF-8"));
cache.getDoubles(reader, "theDouble", false);
cache.getFloats(reader, "theDouble", false);
assertTrue(bos.toString().indexOf("WARNING") != -1);
assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1);
} finally {
FieldCache.DEFAULT.purgeAllCaches();
}
@ -261,7 +262,7 @@ public class TestFieldCache extends LuceneTestCase {
if (chunk == 0) {
for (int ord = 0; ord < values.size(); ord++) {
BytesRef term = values.get(ord);
assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
}
break;
}
@ -275,7 +276,7 @@ public class TestFieldCache extends LuceneTestCase {
reuse = termOrds.lookup(i, reuse);
reuse.read(buffer);
}
assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
}
if (chunk <= buffer.length) {

View File

@ -44,7 +44,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
.setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US));
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT));
int num = atLeast(500);
for (int l = 0; l < num; l++) {

View File

@ -58,7 +58,7 @@ public class TestRegexpRandom extends LuceneTestCase {
Field field = newField("field", "", customType);
doc.add(field);
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 1000; i++) {
field.setStringValue(df.format(i));
writer.addDocument(doc);

View File

@ -54,7 +54,7 @@ public class TestWildcardRandom extends LuceneTestCase {
Field field = newStringField("field", "", Field.Store.NO);
doc.add(field);
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
for (int i = 0; i < 1000; i++) {
field.setStringValue(df.format(i));
writer.addDocument(doc);

View File

@ -81,7 +81,7 @@ public class TestBasics extends LuceneTestCase {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8")));
pos++;
return true;
} else {
@ -411,7 +411,7 @@ public class TestBasics extends LuceneTestCase {
@Test
public void testSpanPayloadCheck() throws Exception {
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
BytesRef pay = new BytesRef(("pos: " + 5).getBytes());
BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8"));
SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes));
checkHits(query, new int[]
{1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
@ -426,8 +426,8 @@ public class TestBasics extends LuceneTestCase {
clauses[0] = term1;
clauses[1] = term2;
snq = new SpanNearQuery(clauses, 0, true);
pay = new BytesRef(("pos: " + 0).getBytes());
pay2 = new BytesRef(("pos: " + 1).getBytes());
pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
list = new ArrayList<byte[]>();
list.add(pay.bytes);
list.add(pay2.bytes);
@ -439,9 +439,9 @@ public class TestBasics extends LuceneTestCase {
clauses[1] = term2;
clauses[2] = new SpanTermQuery(new Term("field", "five"));
snq = new SpanNearQuery(clauses, 0, true);
pay = new BytesRef(("pos: " + 0).getBytes());
pay2 = new BytesRef(("pos: " + 1).getBytes());
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes());
pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8"));
list = new ArrayList<byte[]>();
list.add(pay.bytes);
list.add(pay2.bytes);
@ -470,10 +470,10 @@ public class TestBasics extends LuceneTestCase {
checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
Collection<byte[]> payloads = new ArrayList<byte[]>();
BytesRef pay = new BytesRef(("pos: " + 0).getBytes());
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes());
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes());
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes());
BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8"));
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8"));
payloads.add(pay.bytes);
payloads.add(pay2.bytes);
payloads.add(pay3.bytes);

View File

@ -276,7 +276,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload));
payloadSet.add(new String(payload, "UTF-8"));
}
}
}
@ -311,7 +311,7 @@ public class TestPayloadSpans extends LuceneTestCase {
while (spans.next()) {
Collection<byte[]> payloads = spans.getPayload();
for (final byte[] payload : payloads) {
payloadSet.add(new String(payload));
payloadSet.add(new String(payload, "UTF-8"));
}
}
}
@ -347,7 +347,7 @@ public class TestPayloadSpans extends LuceneTestCase {
Collection<byte[]> payloads = spans.getPayload();
for (final byte [] payload : payloads) {
payloadSet.add(new String(payload));
payloadSet.add(new String(payload, "UTF-8"));
}
}
}
@ -383,7 +383,7 @@ public class TestPayloadSpans extends LuceneTestCase {
System.out.println("Num payloads:" + payloads.size());
for (final byte [] bytes : payloads) {
if(VERBOSE)
System.out.println(new String(bytes));
System.out.println(new String(bytes, "UTF-8"));
}
reader.close();
directory.close();
@ -456,7 +456,7 @@ public class TestPayloadSpans extends LuceneTestCase {
for (final byte [] bytes : payload) {
if(VERBOSE)
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
+ new String(bytes));
+ new String(bytes, "UTF-8"));
}
assertEquals(numPayloads[cnt],payload.size());
@ -505,9 +505,9 @@ public class TestPayloadSpans extends LuceneTestCase {
if (!nopayload.contains(token)) {
if (entities.contains(token)) {
payloadAtt.setPayload(new BytesRef((token + ":Entity:"+ pos ).getBytes()));
payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
} else {
payloadAtt.setPayload(new BytesRef((token + ":Noise:" + pos ).getBytes()));
payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
}
}
pos += posIncrAtt.getPositionIncrement();

Some files were not shown because too many files have changed in this diff Show More