mirror of
https://github.com/apache/lucene.git
synced 2025-02-06 10:08:58 +00:00
LUCENE-4199: Add a new target "check-forbidden-apis", that parses all generated .class files for use of APIs that use default charset, default locale, or default timezone and fail build if violations found. This ensures, that Lucene / Solr is independent on local configuration options
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1359202 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
e3813f8030
@ -62,6 +62,12 @@ Build
|
||||
* LUCENE-4115: JAR resolution/ cleanup should be done automatically for ant
|
||||
clean/ eclipse/ resolve (Dawid Weiss)
|
||||
|
||||
* LUCENE-4199: Add a new target "check-forbidden-apis", that parses all
|
||||
generated .class files for use of APIs that use default charset, default
|
||||
locale, or default timezone and fail build if violations found. This
|
||||
ensures, that Lucene / Solr is independent on local configuration options.
|
||||
(Uwe Schindler, Robert Muir, Dawid Weiss)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-4195: Added package documentation and examples for
|
||||
|
@ -67,44 +67,50 @@
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
<classpath refid="jflex.classpath"/>
|
||||
</taskdef>
|
||||
<jflex file="src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/wikipedia"
|
||||
nobak="on"/>
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
|
||||
</target>
|
||||
|
||||
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
<classpath refid="jflex.classpath"/>
|
||||
</taskdef>
|
||||
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard/std31"
|
||||
nobak="on" />
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
|
||||
<!--
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard/std31" name="StandardTokenizerImpl31"/>
|
||||
-->
|
||||
</target>
|
||||
|
||||
<target name="jflex-UAX29URLEmailTokenizer" depends="jflex-check" if="jflex.present">
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
<classpath refid="jflex.classpath"/>
|
||||
</taskdef>
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard/std31"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard/std34"
|
||||
nobak="on" />
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
|
||||
<!--
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl31"/>
|
||||
<run-jflex dir="src/java/org/apache/lucene/analysis/standard/std31" name="UAX29URLEmailTokenizerImpl34"/>
|
||||
-->
|
||||
</target>
|
||||
|
||||
<!-- Remove the inappropriate JFlex-generated constructor -->
|
||||
<macrodef name="run-jflex">
|
||||
<attribute name="dir"/>
|
||||
<attribute name="name"/>
|
||||
<sequential>
|
||||
<jflex file="@{dir}/@{name}.jflex"
|
||||
outdir="@{dir}"
|
||||
nobak="on" />
|
||||
<replaceregexp file="@{dir}/@{name}.java"
|
||||
match="/\*\*\s*\*\s*Creates a new scanner\..*this\(new java\.io\.InputStreamReader\(in\)\);\s*\}"
|
||||
replace="" flags="sg"/>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="clean-jflex">
|
||||
<delete>
|
||||
<fileset dir="src/java/org/apache/lucene/analysis/charfilter" includes="*.java">
|
||||
<containsregexp expression="generated.*by.*JFlex"/>
|
||||
</fileset>
|
||||
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
|
||||
<containsregexp expression="generated.*by.*JFlex"/>
|
||||
</fileset>
|
||||
|
@ -1,5 +1,7 @@
|
||||
package org.apache.lucene.analysis.br;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
@ -21,6 +23,7 @@ package org.apache.lucene.analysis.br;
|
||||
* A stemmer for Brazilian Portuguese words.
|
||||
*/
|
||||
public class BrazilianStemmer {
|
||||
private static final Locale locale = new Locale("pt", "BR");
|
||||
|
||||
/**
|
||||
* Changed term
|
||||
@ -243,7 +246,7 @@ public class BrazilianStemmer {
|
||||
return null ;
|
||||
}
|
||||
|
||||
value = value.toLowerCase() ;
|
||||
value = value.toLowerCase(locale) ;
|
||||
for (j=0 ; j < value.length() ; j++) {
|
||||
if ((value.charAt(j) == 'á') ||
|
||||
(value.charAt(j) == 'â') ||
|
||||
|
@ -1,4 +1,7 @@
|
||||
package org.apache.lucene.analysis.de;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
// This file is encoded in UTF-8
|
||||
|
||||
/*
|
||||
@ -37,6 +40,8 @@ public class GermanStemmer
|
||||
* Amount of characters that are removed with <tt>substitute()</tt> while stemming.
|
||||
*/
|
||||
private int substCount = 0;
|
||||
|
||||
private static final Locale locale = new Locale("de", "DE");
|
||||
|
||||
/**
|
||||
* Stemms the given term to an unique <tt>discriminator</tt>.
|
||||
@ -47,7 +52,7 @@ public class GermanStemmer
|
||||
protected String stem( String term )
|
||||
{
|
||||
// Use lowercase for medium stemming.
|
||||
term = term.toLowerCase();
|
||||
term = term.toLowerCase(locale);
|
||||
if ( !isStemmable( term ) )
|
||||
return term;
|
||||
// Reset the StringBuilder.
|
||||
|
@ -252,7 +252,7 @@ public class HunspellDictionary {
|
||||
}
|
||||
|
||||
String condition = ruleArgs[4];
|
||||
affix.setCondition(condition, String.format(conditionPattern, condition));
|
||||
affix.setCondition(condition, String.format(Locale.ROOT, conditionPattern, condition));
|
||||
affix.setCrossProduct(crossProduct);
|
||||
|
||||
List<HunspellAffix> list = affixes.get(affix.getAppend());
|
||||
@ -376,7 +376,7 @@ public class HunspellDictionary {
|
||||
Arrays.sort(wordForm.getFlags());
|
||||
entry = line.substring(0, flagSep);
|
||||
if(ignoreCase) {
|
||||
entry = entry.toLowerCase(Locale.ENGLISH);
|
||||
entry = entry.toLowerCase(Locale.ROOT);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.hunspell;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.ParseException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
@ -330,7 +331,7 @@ public class HunspellStemmer {
|
||||
|
||||
HunspellStemmer stemmer = new HunspellStemmer(dictionary);
|
||||
|
||||
Scanner scanner = new Scanner(System.in);
|
||||
Scanner scanner = new Scanner(System.in, Charset.defaultCharset().name());
|
||||
|
||||
System.out.print("> ");
|
||||
while (scanner.hasNextLine()) {
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.sinks;
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParseException;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
@ -37,10 +38,11 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
|
||||
protected CharTermAttribute termAtt;
|
||||
|
||||
/**
|
||||
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
|
||||
* Uses {@link java.text.SimpleDateFormat#getDateInstance(DateFormat.DEFAULT, Locale.ROOT)} as
|
||||
* the {@link java.text.DateFormat} object.
|
||||
*/
|
||||
public DateRecognizerSinkFilter() {
|
||||
this(DateFormat.getDateInstance());
|
||||
this(DateFormat.getDateInstance(DateFormat.DEFAULT, Locale.ROOT));
|
||||
}
|
||||
|
||||
public DateRecognizerSinkFilter(DateFormat dateFormat) {
|
||||
|
@ -1,8 +1,8 @@
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 9/30/11 12:10 PM from the specification file
|
||||
* <tt>/lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
* on 08.07.12 16:59 from the specification file
|
||||
* <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
||||
|
||||
@ -383,15 +383,7 @@ public final void getText(CharTermAttribute t) {
|
||||
this.zzReader = in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new scanner.
|
||||
* There is also java.io.Reader version of this constructor.
|
||||
*
|
||||
* @param in the java.io.Inputstream to read input from.
|
||||
*/
|
||||
ClassicTokenizerImpl(java.io.InputStream in) {
|
||||
this(new java.io.InputStreamReader(in));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Unpacks the compressed character translation table.
|
||||
|
@ -14,7 +14,7 @@
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC
|
||||
// Generated using ICU4J 4.8.1.1 on Sunday, July 8, 2012 2:59:49 PM UTC
|
||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||
|
||||
|
||||
|
@ -1,8 +1,8 @@
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 16:59 */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
@ -759,15 +759,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
||||
this.zzReader = in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new scanner.
|
||||
* There is also java.io.Reader version of this constructor.
|
||||
*
|
||||
* @param in the java.io.Inputstream to read input from.
|
||||
*/
|
||||
public StandardTokenizerImpl(java.io.InputStream in) {
|
||||
this(new java.io.InputStreamReader(in));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Unpacks the compressed character translation table.
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/18/12 12:05 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
@ -3844,15 +3844,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
|
||||
this.zzReader = in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new scanner.
|
||||
* There is also java.io.Reader version of this constructor.
|
||||
*
|
||||
* @param in the java.io.Inputstream to read input from.
|
||||
*/
|
||||
public UAX29URLEmailTokenizerImpl(java.io.InputStream in) {
|
||||
this(new java.io.InputStreamReader(in));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Unpacks the compressed character translation table.
|
||||
|
@ -1,6 +1,6 @@
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
|
@ -1,8 +1,8 @@
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 1/22/12 10:26 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
|
||||
|
||||
package org.apache.lucene.analysis.wikipedia;
|
||||
|
||||
/*
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 1/22/12 10:26 PM from the specification file
|
||||
* <tt>/home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
* on 08.07.12 17:00 from the specification file
|
||||
* <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
@ -519,15 +519,7 @@ final void reset() {
|
||||
this.zzReader = in;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new scanner.
|
||||
* There is also java.io.Reader version of this constructor.
|
||||
*
|
||||
* @param in the java.io.Inputstream to read input from.
|
||||
*/
|
||||
WikipediaTokenizerImpl(java.io.InputStream in) {
|
||||
this(new java.io.InputStreamReader(in));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Unpacks the compressed character translation table.
|
||||
|
@ -79,7 +79,7 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
if (!keywordAttr.isKeyword()) {
|
||||
final String term = termAtt.toString().toLowerCase(Locale.ENGLISH);
|
||||
final String term = termAtt.toString().toLowerCase(Locale.ROOT);
|
||||
termAtt.setEmpty().append(term);
|
||||
}
|
||||
return true;
|
||||
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
||||
public class DateRecognizerSinkTokenizerTest extends BaseTokenStreamTestCase {
|
||||
|
||||
public void test() throws IOException {
|
||||
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.US));
|
||||
DateRecognizerSinkFilter sinkFilter = new DateRecognizerSinkFilter(new SimpleDateFormat("MM/dd/yyyy", Locale.ROOT));
|
||||
String test = "The quick red fox jumped over the lazy brown dogs on 7/11/2006 The dogs finally reacted on 7/12/2006";
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(new MockTokenizer(new StringReader(test), MockTokenizer.WHITESPACE, false));
|
||||
TeeSinkTokenFilter.SinkTokenStream sink = tee.newSinkTokenStream(sinkFilter);
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.sinks;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
@ -164,7 +165,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
||||
TokenStream lowerCasing = new LowerCaseFilter(TEST_VERSION_CURRENT, source1);
|
||||
String[] lowerCaseTokens = new String[tokens1.length];
|
||||
for (int i = 0; i < tokens1.length; i++)
|
||||
lowerCaseTokens[i] = tokens1[i].toLowerCase();
|
||||
lowerCaseTokens[i] = tokens1[i].toLowerCase(Locale.ROOT);
|
||||
assertTokenStreamContents(lowerCasing, lowerCaseTokens);
|
||||
}
|
||||
|
||||
@ -180,7 +181,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
System.out.println("-----Tokens: " + tokCount[k] + "-----");
|
||||
for (int i = 0; i < tokCount[k]; i++) {
|
||||
buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
|
||||
buffer.append(English.intToEnglish(i).toUpperCase(Locale.ROOT)).append(' ');
|
||||
}
|
||||
//make sure we produce the same tokens
|
||||
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(TEST_VERSION_CURRENT, new StandardTokenizer(TEST_VERSION_CURRENT, new StringReader(buffer.toString()))));
|
||||
|
@ -32,7 +32,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testConsumeWordInstance() {
|
||||
BreakIterator bi = BreakIterator.getWordInstance();
|
||||
// we use the default locale, as its randomized by LuceneTestCase
|
||||
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
|
||||
CharArrayIterator ci = CharArrayIterator.newWordInstance();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
|
||||
@ -43,7 +44,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
|
||||
|
||||
/* run this to test if your JRE is buggy
|
||||
public void testWordInstanceJREBUG() {
|
||||
BreakIterator bi = BreakIterator.getWordInstance();
|
||||
// we use the default locale, as its randomized by LuceneTestCase
|
||||
BreakIterator bi = BreakIterator.getWordInstance(Locale.getDefault());
|
||||
Segment ci = new Segment();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
|
||||
@ -60,7 +62,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testConsumeSentenceInstance() {
|
||||
BreakIterator bi = BreakIterator.getSentenceInstance();
|
||||
// we use the default locale, as its randomized by LuceneTestCase
|
||||
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
|
||||
CharArrayIterator ci = CharArrayIterator.newSentenceInstance();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
char text[] = _TestUtil.randomUnicodeString(random()).toCharArray();
|
||||
@ -71,7 +74,8 @@ public class TestCharArrayIterator extends LuceneTestCase {
|
||||
|
||||
/* run this to test if your JRE is buggy
|
||||
public void testSentenceInstanceJREBUG() {
|
||||
BreakIterator bi = BreakIterator.getSentenceInstance();
|
||||
// we use the default locale, as its randomized by LuceneTestCase
|
||||
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
|
||||
Segment ci = new Segment();
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
char text[] = _TestUtil.randomUnicodeString(random).toCharArray();
|
||||
|
@ -36,7 +36,7 @@ public class TestCharArrayMap extends LuceneTestCase {
|
||||
key[j] = (char)random().nextInt(127);
|
||||
}
|
||||
String keyStr = new String(key);
|
||||
String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ENGLISH) : keyStr;
|
||||
String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr;
|
||||
|
||||
int val = random().nextInt();
|
||||
|
||||
|
@ -208,16 +208,16 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
set.add(upper);
|
||||
}
|
||||
for (int i = 0; i < upperArr.length; i++) {
|
||||
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
}
|
||||
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS), false);
|
||||
for (String upper : upperArr) {
|
||||
set.add(upper);
|
||||
}
|
||||
for (int i = 0; i < upperArr.length; i++) {
|
||||
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertFalse(String.format(falsePos, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@ -235,8 +235,8 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
set.add(upper);
|
||||
}
|
||||
for (int i = 0; i < upperArr.length; i++) {
|
||||
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertTrue(String.format(missing, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
|
||||
}
|
||||
set = new CharArraySet(TEST_VERSION_CURRENT, Arrays.asList(TEST_STOP_WORDS),
|
||||
false);
|
||||
@ -244,8 +244,8 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
set.add(upper);
|
||||
}
|
||||
for (int i = 0; i < upperArr.length; i++) {
|
||||
assertTrue(String.format(missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertFalse(String.format(falsePos, upperArr[i]), set
|
||||
assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
|
||||
assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
|
||||
.contains(lowerArr[i]));
|
||||
}
|
||||
}
|
||||
@ -258,7 +258,7 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
|
||||
List<String> stopwordsUpper = new ArrayList<String>();
|
||||
for (String string : stopwords) {
|
||||
stopwordsUpper.add(string.toUpperCase());
|
||||
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
|
||||
setIngoreCase.add(Integer.valueOf(1));
|
||||
@ -305,7 +305,7 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
|
||||
List<String> stopwordsUpper = new ArrayList<String>();
|
||||
for (String string : stopwords) {
|
||||
stopwordsUpper.add(string.toUpperCase());
|
||||
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
|
||||
setIngoreCase.add(Integer.valueOf(1));
|
||||
@ -351,7 +351,7 @@ public class TestCharArraySet extends LuceneTestCase {
|
||||
List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
|
||||
List<String> stopwordsUpper = new ArrayList<String>();
|
||||
for (String string : stopwords) {
|
||||
stopwordsUpper.add(string.toUpperCase());
|
||||
stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
set.addAll(Arrays.asList(TEST_STOP_WORDS));
|
||||
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
@ -53,7 +54,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
|
||||
builder.insert(1023, "\ud801\udc1c");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
|
||||
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -70,7 +71,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||
}
|
||||
builder.append("\ud801\udc1cabc");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
}
|
||||
|
||||
@ -84,7 +85,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||
builder.append("A");
|
||||
}
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
/*
|
||||
@ -98,7 +99,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||
}
|
||||
builder.append("\ud801\udc1c");
|
||||
Tokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
|
||||
}
|
||||
|
||||
// LUCENE-3642: normalize SMP->BMP and check that offsets are correct
|
||||
|
@ -123,11 +123,11 @@ public class GenerateJflexTLDMacros {
|
||||
while (null != (line = reader.readLine())) {
|
||||
Matcher matcher = TLD_PATTERN_1.matcher(line);
|
||||
if (matcher.matches()) {
|
||||
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
|
||||
TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
|
||||
} else {
|
||||
matcher = TLD_PATTERN_2.matcher(line);
|
||||
if (matcher.matches()) {
|
||||
TLDs.add(matcher.group(1).toLowerCase(Locale.US));
|
||||
TLDs.add(matcher.group(1).toLowerCase(Locale.ROOT));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -146,7 +146,7 @@ public class GenerateJflexTLDMacros {
|
||||
*/
|
||||
private void writeOutput(SortedSet<String> ASCIITLDs) throws IOException {
|
||||
final DateFormat dateFormat = DateFormat.getDateTimeInstance
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.US);
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
|
||||
dateFormat.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
final Writer writer = new OutputStreamWriter
|
||||
(new FileOutputStream(outputFile), "UTF-8");
|
||||
|
@ -64,7 +64,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
||||
//
|
||||
public void testCollationKeySort() throws Exception {
|
||||
Analyzer usAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.US));
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.ROOT));
|
||||
Analyzer franceAnalyzer = new ICUCollationKeyAnalyzer
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(Locale.FRANCE));
|
||||
Analyzer swedenAnalyzer = new ICUCollationKeyAnalyzer
|
||||
@ -73,7 +73,7 @@ public class TestICUCollationKeyAnalyzer extends CollationTestBase {
|
||||
(TEST_VERSION_CURRENT, Collator.getInstance(new Locale("da", "dk")));
|
||||
|
||||
// The ICU Collator and java.text.Collator implementations differ in their
|
||||
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.US.
|
||||
// orderings - "BFJHD" is the ordering for the ICU Collator for Locale.ROOT.
|
||||
testCollationKeySort
|
||||
(usAnalyzer, franceAnalyzer, swedenAnalyzer, denmarkAnalyzer,
|
||||
"BFJHD", "ECAGI", "BJDFH", "BJDHF");
|
||||
|
@ -29,7 +29,7 @@ public class GenerateHTMLStripCharFilterSupplementaryMacros {
|
||||
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
|
||||
private static final String NL = System.getProperty("line.separator");
|
||||
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.US);
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
|
||||
static {
|
||||
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ public class GenerateJFlexSupplementaryMacros {
|
||||
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
|
||||
private static final String NL = System.getProperty("line.separator");
|
||||
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.US);
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.ROOT);
|
||||
static {
|
||||
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
}
|
||||
|
@ -607,7 +607,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||
|
||||
private void doTestBocchan(int numIterations) throws Exception {
|
||||
LineNumberReader reader = new LineNumberReader(new InputStreamReader(
|
||||
this.getClass().getResourceAsStream("bocchan.utf-8")));
|
||||
this.getClass().getResourceAsStream("bocchan.utf-8"), "UTF-8"));
|
||||
String line = reader.readLine();
|
||||
reader.close();
|
||||
|
||||
|
@ -65,7 +65,7 @@ public class StempelStemmer {
|
||||
DataInputStream in = null;
|
||||
try {
|
||||
in = new DataInputStream(new BufferedInputStream(stemmerTable));
|
||||
String method = in.readUTF().toUpperCase(Locale.ENGLISH);
|
||||
String method = in.readUTF().toUpperCase(Locale.ROOT);
|
||||
if (method.indexOf('M') < 0) {
|
||||
return new org.egothor.stemmer.Trie(in);
|
||||
} else {
|
||||
|
@ -63,6 +63,7 @@ import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.util.Locale;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
@ -89,7 +90,7 @@ public class Compile {
|
||||
return;
|
||||
}
|
||||
|
||||
args[0].toUpperCase();
|
||||
args[0].toUpperCase(Locale.ROOT);
|
||||
|
||||
backward = args[0].charAt(0) == '-';
|
||||
int qq = (backward) ? 1 : 0;
|
||||
@ -127,7 +128,7 @@ public class Compile {
|
||||
new FileInputStream(args[i]), charset)));
|
||||
for (String line = in.readLine(); line != null; line = in.readLine()) {
|
||||
try {
|
||||
line = line.toLowerCase();
|
||||
line = line.toLowerCase(Locale.ROOT);
|
||||
StringTokenizer st = new StringTokenizer(line);
|
||||
String stem = st.nextToken();
|
||||
if (storeorig) {
|
||||
|
@ -55,9 +55,11 @@
|
||||
package org.egothor.stemmer;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.util.Locale;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
@ -95,10 +97,11 @@ public class DiffIt {
|
||||
// System.out.println("[" + args[i] + "]");
|
||||
Diff diff = new Diff(ins, del, rep, nop);
|
||||
try {
|
||||
in = new LineNumberReader(new BufferedReader(new FileReader(args[i])));
|
||||
String charset = System.getProperty("egothor.stemmer.charset", "UTF-8");
|
||||
in = new LineNumberReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[i]), charset)));
|
||||
for (String line = in.readLine(); line != null; line = in.readLine()) {
|
||||
try {
|
||||
line = line.toLowerCase();
|
||||
line = line.toLowerCase(Locale.ROOT);
|
||||
StringTokenizer st = new StringTokenizer(line);
|
||||
String stem = st.nextToken();
|
||||
System.out.println(stem + " -a");
|
||||
|
@ -60,12 +60,14 @@ import java.io.BufferedReader;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.net.URI;
|
||||
import java.util.Locale;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestCompile extends LuceneTestCase {
|
||||
@ -107,7 +109,7 @@ public class TestCompile extends LuceneTestCase {
|
||||
Trie trie;
|
||||
DataInputStream is = new DataInputStream(new BufferedInputStream(
|
||||
new FileInputStream(path)));
|
||||
String method = is.readUTF().toUpperCase();
|
||||
String method = is.readUTF().toUpperCase(Locale.ROOT);
|
||||
if (method.indexOf('M') < 0) {
|
||||
trie = new Trie(is);
|
||||
} else {
|
||||
@ -120,11 +122,11 @@ public class TestCompile extends LuceneTestCase {
|
||||
private static void assertTrie(Trie trie, String file, boolean usefull,
|
||||
boolean storeorig) throws Exception {
|
||||
LineNumberReader in = new LineNumberReader(new BufferedReader(
|
||||
new FileReader(file)));
|
||||
new InputStreamReader(new FileInputStream(file), IOUtils.CHARSET_UTF_8)));
|
||||
|
||||
for (String line = in.readLine(); line != null; line = in.readLine()) {
|
||||
try {
|
||||
line = line.toLowerCase();
|
||||
line = line.toLowerCase(Locale.ROOT);
|
||||
StringTokenizer st = new StringTokenizer(line);
|
||||
String stem = st.nextToken();
|
||||
if (storeorig) {
|
||||
@ -132,7 +134,7 @@ public class TestCompile extends LuceneTestCase {
|
||||
.getLastOnPath(stem);
|
||||
StringBuilder stm = new StringBuilder(stem);
|
||||
Diff.apply(stm, cmd);
|
||||
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
|
||||
assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
while (st.hasMoreTokens()) {
|
||||
String token = st.nextToken();
|
||||
@ -143,7 +145,7 @@ public class TestCompile extends LuceneTestCase {
|
||||
.getLastOnPath(token);
|
||||
StringBuilder stm = new StringBuilder(token);
|
||||
Diff.apply(stm, cmd);
|
||||
assertEquals(stem.toLowerCase(), stm.toString().toLowerCase());
|
||||
assertEquals(stem.toLowerCase(Locale.ROOT), stm.toString().toLowerCase(Locale.ROOT));
|
||||
}
|
||||
} catch (java.util.NoSuchElementException x) {
|
||||
// no base token (stem) on a line
|
||||
|
@ -262,9 +262,11 @@
|
||||
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
|
||||
|
||||
<target name="clean-javacc">
|
||||
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
<delete>
|
||||
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="init,javacc-check" if="javacc.present">
|
||||
|
@ -23,6 +23,7 @@ import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Algorithm;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
@ -106,7 +107,7 @@ public class Benchmark {
|
||||
|
||||
Benchmark benchmark = null;
|
||||
try {
|
||||
benchmark = new Benchmark(new FileReader(algFile));
|
||||
benchmark = new Benchmark(IOUtils.getDecodingReader(algFile, IOUtils.CHARSET_UTF_8));
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
System.exit(1);
|
||||
|
@ -18,12 +18,14 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||
*/
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParsePosition;
|
||||
import java.text.SimpleDateFormat;
|
||||
@ -161,7 +163,7 @@ public class DirContentSource extends ContentSource {
|
||||
dfi = new DateFormatInfo();
|
||||
dfi.pos = new ParsePosition(0);
|
||||
// date format: 30-MAR-1987 14:22:36.87
|
||||
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.US);
|
||||
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS", Locale.ROOT);
|
||||
dfi.df.setLenient(true);
|
||||
dateFormat.set(dfi);
|
||||
}
|
||||
@ -198,7 +200,7 @@ public class DirContentSource extends ContentSource {
|
||||
name = f.getCanonicalPath()+"_"+iteration;
|
||||
}
|
||||
|
||||
BufferedReader reader = new BufferedReader(new FileReader(f));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
|
||||
String line = null;
|
||||
//First line is the date, 3rd is the title, rest is body
|
||||
String dateStr = reader.readLine();
|
||||
|
@ -29,6 +29,7 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Random;
|
||||
import java.util.TimeZone;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
@ -182,8 +183,8 @@ public class DocMaker implements Closeable {
|
||||
private boolean storeBytes = false;
|
||||
|
||||
private static class DateUtil {
|
||||
public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.US);
|
||||
public Calendar cal = Calendar.getInstance();
|
||||
public SimpleDateFormat parser = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss", Locale.ROOT);
|
||||
public Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
|
||||
public ParsePosition pos = new ParsePosition(0);
|
||||
public DateUtil() {
|
||||
parser.setLenient(true);
|
||||
|
@ -25,6 +25,7 @@ import java.io.InputStreamReader;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
@ -146,7 +147,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||
case BODY:
|
||||
body = contents.toString();
|
||||
//workaround that startswith doesn't have an ignore case option, get at least 20 chars.
|
||||
String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase();
|
||||
String startsWith = body.substring(0, Math.min(10, contents.length())).toLowerCase(Locale.ROOT);
|
||||
if (startsWith.startsWith("#redirect")) {
|
||||
body = null;
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.*;
|
||||
@ -59,13 +60,14 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
|
||||
{
|
||||
File file = new File(fileName);
|
||||
Reader reader = null;
|
||||
// note: we use a decoding reader, so if your queries are screwed up you know
|
||||
if (file.exists()) {
|
||||
reader = new FileReader(file);
|
||||
reader = IOUtils.getDecodingReader(file, IOUtils.CHARSET_UTF_8);
|
||||
} else {
|
||||
//see if we can find it as a resource
|
||||
InputStream asStream = FileBasedQueryMaker.class.getClassLoader().getResourceAsStream(fileName);
|
||||
if (asStream != null) {
|
||||
reader = new InputStreamReader(asStream);
|
||||
reader = IOUtils.getDecodingReader(asStream, IOUtils.CHARSET_UTF_8);
|
||||
}
|
||||
}
|
||||
if (reader != null) {
|
||||
|
@ -35,7 +35,7 @@ public class LongToEnglishContentSource extends ContentSource{
|
||||
}
|
||||
|
||||
// TODO: we could take param to specify locale...
|
||||
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
|
||||
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
|
||||
RuleBasedNumberFormat.SPELLOUT);
|
||||
@Override
|
||||
public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException {
|
||||
|
@ -37,7 +37,7 @@ public class LongToEnglishQueryMaker implements QueryMaker {
|
||||
protected QueryParser parser;
|
||||
|
||||
// TODO: we could take param to specify locale...
|
||||
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ENGLISH,
|
||||
private final RuleBasedNumberFormat rnbf = new RuleBasedNumberFormat(Locale.ROOT,
|
||||
RuleBasedNumberFormat.SPELLOUT);
|
||||
|
||||
public Query makeQuery(int size) throws Exception {
|
||||
|
@ -19,8 +19,9 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.ParsePosition;
|
||||
import java.text.SimpleDateFormat;
|
||||
@ -29,6 +30,7 @@ import java.util.Date;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* A {@link ContentSource} reading from the Reuters collection.
|
||||
@ -74,7 +76,7 @@ public class ReutersContentSource extends ContentSource {
|
||||
if (dfi == null) {
|
||||
dfi = new DateFormatInfo();
|
||||
// date format: 30-MAR-1987 14:22:36.87
|
||||
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||
dfi.df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.ROOT);
|
||||
dfi.df.setLenient(true);
|
||||
dfi.pos = new ParsePosition(0);
|
||||
dateFormat.set(dfi);
|
||||
@ -112,7 +114,7 @@ public class ReutersContentSource extends ContentSource {
|
||||
name = f.getCanonicalPath() + "_" + iteration;
|
||||
}
|
||||
|
||||
BufferedReader reader = new BufferedReader(new FileReader(f));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f), IOUtils.CHARSET_UTF_8));
|
||||
try {
|
||||
// First line is the date, 3rd is the title, rest is body
|
||||
String dateStr = reader.readLine();
|
||||
|
@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
|
||||
dfi = new DateFormatInfo();
|
||||
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
|
||||
for (int i = 0; i < dfi.dfs.length; i++) {
|
||||
dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.US);
|
||||
dfi.dfs[i] = new SimpleDateFormat(DATE_FORMATS[i], Locale.ROOT);
|
||||
dfi.dfs[i].setLenient(true);
|
||||
}
|
||||
dfi.pos = new ParsePosition(0);
|
||||
|
@ -47,7 +47,7 @@ public abstract class TrecDocParser {
|
||||
static final Map<String,ParsePathType> pathName2Type = new HashMap<String,ParsePathType>();
|
||||
static {
|
||||
for (ParsePathType ppt : ParsePathType.values()) {
|
||||
pathName2Type.put(ppt.name().toUpperCase(Locale.ENGLISH),ppt);
|
||||
pathName2Type.put(ppt.name().toUpperCase(Locale.ROOT),ppt);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,7 +60,7 @@ public abstract class TrecDocParser {
|
||||
public static ParsePathType pathType(File f) {
|
||||
int pathLength = 0;
|
||||
while (f != null && ++pathLength < MAX_PATH_LENGTH) {
|
||||
ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ENGLISH));
|
||||
ParsePathType ppt = pathName2Type.get(f.getName().toUpperCase(Locale.ROOT));
|
||||
if (ppt!=null) {
|
||||
return ppt;
|
||||
}
|
||||
|
@ -0,0 +1,112 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/**
|
||||
* This interface describes a character stream that maintains line and
|
||||
* column number positions of the characters. It also has the capability
|
||||
* to backup the stream to some extent. An implementation of this
|
||||
* interface is used in the TokenManager implementation generated by
|
||||
* JavaCCParser.
|
||||
*
|
||||
* All the methods except backup can be implemented in any fashion. backup
|
||||
* needs to be implemented correctly for the correct operation of the lexer.
|
||||
* Rest of the methods are all used to get information like line number,
|
||||
* column number and the String that constitutes a token and are not used
|
||||
* by the lexer. Hence their implementation won't affect the generated lexer's
|
||||
* operation.
|
||||
*/
|
||||
|
||||
public interface CharStream {
|
||||
|
||||
/**
|
||||
* Returns the next character from the selected input. The method
|
||||
* of selecting the input is the responsibility of the class
|
||||
* implementing this interface. Can throw any java.io.IOException.
|
||||
*/
|
||||
char readChar() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginLine();
|
||||
|
||||
/**
|
||||
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||
* had already read some characters, but could not use them to match a
|
||||
* (longer) token. So, they will be used again as the prefix of the next
|
||||
* token and it is the implemetation's responsibility to do this right.
|
||||
*/
|
||||
void backup(int amount);
|
||||
|
||||
/**
|
||||
* Returns the next character that marks the beginning of the next token.
|
||||
* All characters must remain in the buffer between two successive calls
|
||||
* to this method to implement backup correctly.
|
||||
*/
|
||||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
*/
|
||||
String GetImage();
|
||||
|
||||
/**
|
||||
* Returns an array of characters that make up the suffix of length 'len' for
|
||||
* the currently matched token. This is used to build up the matched string
|
||||
* for use in actions in the case of MORE. A simple and inefficient
|
||||
* implementation of this is as follows :
|
||||
*
|
||||
* {
|
||||
* String t = GetImage();
|
||||
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||
* }
|
||||
*/
|
||||
char[] GetSuffix(int len);
|
||||
|
||||
/**
|
||||
* The lexer calls this function to indicate that it is done with the stream
|
||||
* and hence implementations can free any resources held by this class.
|
||||
* Again, the body of this function can be just empty and it will not
|
||||
* affect the lexer's operation.
|
||||
*/
|
||||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */
|
@ -0,0 +1,123 @@
|
||||
// FastCharStream.java
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API.
|
||||
* */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
||||
int bufferLength = 0; // end of valid chars
|
||||
int bufferPosition = 0; // next char to read
|
||||
|
||||
int tokenStart = 0; // offset in buffer
|
||||
int bufferStart = 0; // position in file of buffer
|
||||
|
||||
Reader input; // source of chars
|
||||
|
||||
/** Constructs from a Reader. */
|
||||
public FastCharStream(Reader r) {
|
||||
input = r;
|
||||
}
|
||||
|
||||
public final char readChar() throws IOException {
|
||||
if (bufferPosition >= bufferLength)
|
||||
refill();
|
||||
return buffer[bufferPosition++];
|
||||
}
|
||||
|
||||
private final void refill() throws IOException {
|
||||
int newPosition = bufferLength - tokenStart;
|
||||
|
||||
if (tokenStart == 0) { // token won't fit in buffer
|
||||
if (buffer == null) { // first time: alloc buffer
|
||||
buffer = new char[2048];
|
||||
} else if (bufferLength == buffer.length) { // grow buffer
|
||||
char[] newBuffer = new char[buffer.length*2];
|
||||
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
|
||||
buffer = newBuffer;
|
||||
}
|
||||
} else { // shift token to front
|
||||
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
|
||||
}
|
||||
|
||||
bufferLength = newPosition; // update state
|
||||
bufferPosition = newPosition;
|
||||
bufferStart += tokenStart;
|
||||
tokenStart = 0;
|
||||
|
||||
int charsRead = // fill space in buffer
|
||||
input.read(buffer, newPosition, buffer.length-newPosition);
|
||||
if (charsRead == -1)
|
||||
throw new IOException("read past eof");
|
||||
else
|
||||
bufferLength += charsRead;
|
||||
}
|
||||
|
||||
public final char BeginToken() throws IOException {
|
||||
tokenStart = bufferPosition;
|
||||
return readChar();
|
||||
}
|
||||
|
||||
public final void backup(int amount) {
|
||||
bufferPosition -= amount;
|
||||
}
|
||||
|
||||
public final String GetImage() {
|
||||
return new String(buffer, tokenStart, bufferPosition - tokenStart);
|
||||
}
|
||||
|
||||
public final char[] GetSuffix(int len) {
|
||||
char[] value = new char[len];
|
||||
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
|
||||
return value;
|
||||
}
|
||||
|
||||
public final void Done() {
|
||||
try {
|
||||
input.close();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
public final int getColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getEndColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getEndLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getBeginColumn() {
|
||||
return bufferStart + tokenStart;
|
||||
}
|
||||
public final int getBeginLine() {
|
||||
return 1;
|
||||
}
|
||||
}
|
@ -29,6 +29,10 @@ public class HTMLParser implements HTMLParserConstants {
|
||||
private MyPipedInputStream pipeInStream = null;
|
||||
private PipedOutputStream pipeOutStream = null;
|
||||
|
||||
public HTMLParser(Reader reader) {
|
||||
this(new FastCharStream(reader));
|
||||
}
|
||||
|
||||
private class MyPipedInputStream extends PipedInputStream{
|
||||
|
||||
public MyPipedInputStream(){
|
||||
@ -227,7 +231,7 @@ InterruptedException {
|
||||
Token t1, t2;
|
||||
boolean inImg = false;
|
||||
t1 = jj_consume_token(TagName);
|
||||
String tagName = t1.image.toLowerCase(Locale.ENGLISH);
|
||||
String tagName = t1.image.toLowerCase(Locale.ROOT);
|
||||
if(Tags.WS_ELEMS.contains(tagName) ) {
|
||||
addSpace();
|
||||
}
|
||||
@ -264,7 +268,7 @@ InterruptedException {
|
||||
)
|
||||
&& t2 != null)
|
||||
{
|
||||
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
|
||||
currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
|
||||
if(currentMetaTag != null && currentMetaContent != null) {
|
||||
addMetaTag();
|
||||
}
|
||||
@ -272,7 +276,7 @@ InterruptedException {
|
||||
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
|
||||
null)
|
||||
{
|
||||
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
|
||||
currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
|
||||
if(currentMetaTag != null && currentMetaContent != null) {
|
||||
addMetaTag();
|
||||
}
|
||||
@ -464,7 +468,6 @@ null)
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public HTMLParserTokenManager token_source;
|
||||
SimpleCharStream jj_input_stream;
|
||||
/** Current token. */
|
||||
public Token token;
|
||||
/** Next token. */
|
||||
@ -485,14 +488,9 @@ null)
|
||||
private boolean jj_rescan = false;
|
||||
private int jj_gc = 0;
|
||||
|
||||
/** Constructor with InputStream. */
|
||||
public HTMLParser(java.io.InputStream stream) {
|
||||
this(stream, null);
|
||||
}
|
||||
/** Constructor with InputStream and supplied encoding */
|
||||
public HTMLParser(java.io.InputStream stream, String encoding) {
|
||||
try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
|
||||
token_source = new HTMLParserTokenManager(jj_input_stream);
|
||||
/** Constructor with user supplied CharStream. */
|
||||
public HTMLParser(CharStream stream) {
|
||||
token_source = new HTMLParserTokenManager(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
@ -501,35 +499,8 @@ null)
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream stream) {
|
||||
ReInit(stream, null);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream stream, String encoding) {
|
||||
try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
|
||||
token_source.ReInit(jj_input_stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public HTMLParser(java.io.Reader stream) {
|
||||
jj_input_stream = new SimpleCharStream(stream, 1, 1);
|
||||
token_source = new HTMLParserTokenManager(jj_input_stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader stream) {
|
||||
jj_input_stream.ReInit(stream, 1, 1);
|
||||
token_source.ReInit(jj_input_stream);
|
||||
public void ReInit(CharStream stream) {
|
||||
token_source.ReInit(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
@ -631,7 +602,7 @@ null)
|
||||
return (jj_ntk = jj_nt.kind);
|
||||
}
|
||||
|
||||
private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
|
||||
private java.util.List jj_expentries = new java.util.ArrayList();
|
||||
private int[] jj_expentry;
|
||||
private int jj_kind = -1;
|
||||
private int[] jj_lasttokens = new int[100];
|
||||
@ -691,7 +662,7 @@ null)
|
||||
jj_add_error_token(0, 0);
|
||||
int[][] exptokseq = new int[jj_expentries.size()][];
|
||||
for (int i = 0; i < jj_expentries.size(); i++) {
|
||||
exptokseq[i] = jj_expentries.get(i);
|
||||
exptokseq[i] = (int[])jj_expentries.get(i);
|
||||
}
|
||||
return new ParseException(token, exptokseq, tokenImage);
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ options {
|
||||
//DEBUG_LOOKAHEAD = true;
|
||||
//DEBUG_TOKEN_MANAGER = true;
|
||||
UNICODE_INPUT = true;
|
||||
USER_CHAR_STREAM=true;
|
||||
}
|
||||
|
||||
PARSER_BEGIN(HTMLParser)
|
||||
@ -56,6 +57,10 @@ public class HTMLParser {
|
||||
private MyPipedInputStream pipeInStream = null;
|
||||
private PipedOutputStream pipeOutStream = null;
|
||||
|
||||
public HTMLParser(Reader reader) {
|
||||
this(new FastCharStream(reader));
|
||||
}
|
||||
|
||||
private class MyPipedInputStream extends PipedInputStream{
|
||||
|
||||
public MyPipedInputStream(){
|
||||
@ -227,7 +232,7 @@ void Tag() throws IOException :
|
||||
}
|
||||
{
|
||||
t1=<TagName> {
|
||||
String tagName = t1.image.toLowerCase(Locale.ENGLISH);
|
||||
String tagName = t1.image.toLowerCase(Locale.ROOT);
|
||||
if(Tags.WS_ELEMS.contains(tagName) ) {
|
||||
addSpace();
|
||||
}
|
||||
@ -249,7 +254,7 @@ void Tag() throws IOException :
|
||||
)
|
||||
&& t2 != null)
|
||||
{
|
||||
currentMetaTag=t2.image.toLowerCase(Locale.ENGLISH);
|
||||
currentMetaTag=t2.image.toLowerCase(Locale.ROOT);
|
||||
if(currentMetaTag != null && currentMetaContent != null) {
|
||||
addMetaTag();
|
||||
}
|
||||
@ -257,7 +262,7 @@ void Tag() throws IOException :
|
||||
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
|
||||
null)
|
||||
{
|
||||
currentMetaContent=t2.image.toLowerCase(Locale.ENGLISH);
|
||||
currentMetaContent=t2.image.toLowerCase(Locale.ROOT);
|
||||
if(currentMetaTag != null && currentMetaContent != null) {
|
||||
addMetaTag();
|
||||
}
|
||||
|
@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
||||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
|
||||
static final long[] jjtoSkip = {
|
||||
0x400000L,
|
||||
};
|
||||
protected SimpleCharStream input_stream;
|
||||
protected CharStream input_stream;
|
||||
private final int[] jjrounds = new int[28];
|
||||
private final int[] jjstateSet = new int[56];
|
||||
protected char curChar;
|
||||
/** Constructor. */
|
||||
public HTMLParserTokenManager(SimpleCharStream stream){
|
||||
if (SimpleCharStream.staticFlag)
|
||||
throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
|
||||
public HTMLParserTokenManager(CharStream stream){
|
||||
input_stream = stream;
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
|
||||
public HTMLParserTokenManager(CharStream stream, int lexState){
|
||||
this(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(SimpleCharStream stream)
|
||||
public void ReInit(CharStream stream)
|
||||
{
|
||||
jjmatchedPos = jjnewStateCnt = 0;
|
||||
curLexState = defaultLexState;
|
||||
@ -1475,7 +1473,7 @@ private void ReInitRounds()
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(SimpleCharStream stream, int lexState)
|
||||
public void ReInit(CharStream stream, int lexState)
|
||||
{
|
||||
ReInit(stream);
|
||||
SwitchTo(lexState);
|
||||
|
@ -195,4 +195,4 @@ public class ParseException extends Exception {
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */
|
||||
|
@ -1,472 +0,0 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/**
|
||||
* An implementation of interface CharStream, where the stream is assumed to
|
||||
* contain only ASCII characters (without unicode processing).
|
||||
*/
|
||||
|
||||
public class SimpleCharStream
|
||||
{
|
||||
/** Whether parser is static. */
|
||||
public static final boolean staticFlag = false;
|
||||
int bufsize;
|
||||
int available;
|
||||
int tokenBegin;
|
||||
/** Position in buffer. */
|
||||
public int bufpos = -1;
|
||||
protected int bufline[];
|
||||
protected int bufcolumn[];
|
||||
|
||||
protected int column = 0;
|
||||
protected int line = 1;
|
||||
|
||||
protected boolean prevCharIsCR = false;
|
||||
protected boolean prevCharIsLF = false;
|
||||
|
||||
protected java.io.Reader inputStream;
|
||||
|
||||
protected char[] buffer;
|
||||
protected int maxNextCharInd = 0;
|
||||
protected int inBuf = 0;
|
||||
protected int tabSize = 8;
|
||||
|
||||
protected void setTabSize(int i) { tabSize = i; }
|
||||
protected int getTabSize(int i) { return tabSize; }
|
||||
|
||||
|
||||
protected void ExpandBuff(boolean wrapAround)
|
||||
{
|
||||
char[] newbuffer = new char[bufsize + 2048];
|
||||
int newbufline[] = new int[bufsize + 2048];
|
||||
int newbufcolumn[] = new int[bufsize + 2048];
|
||||
|
||||
try
|
||||
{
|
||||
if (wrapAround)
|
||||
{
|
||||
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(buffer, 0, newbuffer,
|
||||
bufsize - tokenBegin, bufpos);
|
||||
buffer = newbuffer;
|
||||
|
||||
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
|
||||
bufline = newbufline;
|
||||
|
||||
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
|
||||
bufcolumn = newbufcolumn;
|
||||
|
||||
maxNextCharInd = (bufpos += (bufsize - tokenBegin));
|
||||
}
|
||||
else
|
||||
{
|
||||
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
|
||||
buffer = newbuffer;
|
||||
|
||||
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
|
||||
bufline = newbufline;
|
||||
|
||||
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
|
||||
bufcolumn = newbufcolumn;
|
||||
|
||||
maxNextCharInd = (bufpos -= tokenBegin);
|
||||
}
|
||||
}
|
||||
catch (Throwable t)
|
||||
{
|
||||
throw new Error(t.getMessage());
|
||||
}
|
||||
|
||||
|
||||
bufsize += 2048;
|
||||
available = bufsize;
|
||||
tokenBegin = 0;
|
||||
}
|
||||
|
||||
protected void FillBuff() throws java.io.IOException
|
||||
{
|
||||
if (maxNextCharInd == available)
|
||||
{
|
||||
if (available == bufsize)
|
||||
{
|
||||
if (tokenBegin > 2048)
|
||||
{
|
||||
bufpos = maxNextCharInd = 0;
|
||||
available = tokenBegin;
|
||||
}
|
||||
else if (tokenBegin < 0)
|
||||
bufpos = maxNextCharInd = 0;
|
||||
else
|
||||
ExpandBuff(false);
|
||||
}
|
||||
else if (available > tokenBegin)
|
||||
available = bufsize;
|
||||
else if ((tokenBegin - available) < 2048)
|
||||
ExpandBuff(true);
|
||||
else
|
||||
available = tokenBegin;
|
||||
}
|
||||
|
||||
int i;
|
||||
try {
|
||||
if ((i = inputStream.read(buffer, maxNextCharInd,
|
||||
available - maxNextCharInd)) == -1)
|
||||
{
|
||||
inputStream.close();
|
||||
throw new java.io.IOException();
|
||||
}
|
||||
else
|
||||
maxNextCharInd += i;
|
||||
return;
|
||||
}
|
||||
catch(java.io.IOException e) {
|
||||
--bufpos;
|
||||
backup(0);
|
||||
if (tokenBegin == -1)
|
||||
tokenBegin = bufpos;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/** Start. */
|
||||
public char BeginToken() throws java.io.IOException
|
||||
{
|
||||
tokenBegin = -1;
|
||||
char c = readChar();
|
||||
tokenBegin = bufpos;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
protected void UpdateLineColumn(char c)
|
||||
{
|
||||
column++;
|
||||
|
||||
if (prevCharIsLF)
|
||||
{
|
||||
prevCharIsLF = false;
|
||||
line += (column = 1);
|
||||
}
|
||||
else if (prevCharIsCR)
|
||||
{
|
||||
prevCharIsCR = false;
|
||||
if (c == '\n')
|
||||
{
|
||||
prevCharIsLF = true;
|
||||
}
|
||||
else
|
||||
line += (column = 1);
|
||||
}
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\r' :
|
||||
prevCharIsCR = true;
|
||||
break;
|
||||
case '\n' :
|
||||
prevCharIsLF = true;
|
||||
break;
|
||||
case '\t' :
|
||||
column--;
|
||||
column += (tabSize - (column % tabSize));
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
}
|
||||
|
||||
bufline[bufpos] = line;
|
||||
bufcolumn[bufpos] = column;
|
||||
}
|
||||
|
||||
/** Read a character. */
|
||||
public char readChar() throws java.io.IOException
|
||||
{
|
||||
if (inBuf > 0)
|
||||
{
|
||||
--inBuf;
|
||||
|
||||
if (++bufpos == bufsize)
|
||||
bufpos = 0;
|
||||
|
||||
return buffer[bufpos];
|
||||
}
|
||||
|
||||
if (++bufpos >= maxNextCharInd)
|
||||
FillBuff();
|
||||
|
||||
char c = buffer[bufpos];
|
||||
|
||||
UpdateLineColumn(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
|
||||
public int getColumn() {
|
||||
return bufcolumn[bufpos];
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
|
||||
public int getLine() {
|
||||
return bufline[bufpos];
|
||||
}
|
||||
|
||||
/** Get token end column number. */
|
||||
public int getEndColumn() {
|
||||
return bufcolumn[bufpos];
|
||||
}
|
||||
|
||||
/** Get token end line number. */
|
||||
public int getEndLine() {
|
||||
return bufline[bufpos];
|
||||
}
|
||||
|
||||
/** Get token beginning column number. */
|
||||
public int getBeginColumn() {
|
||||
return bufcolumn[tokenBegin];
|
||||
}
|
||||
|
||||
/** Get token beginning line number. */
|
||||
public int getBeginLine() {
|
||||
return bufline[tokenBegin];
|
||||
}
|
||||
|
||||
/** Backup a number of characters. */
|
||||
public void backup(int amount) {
|
||||
|
||||
inBuf += amount;
|
||||
if ((bufpos -= amount) < 0)
|
||||
bufpos += bufsize;
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
inputStream = dstream;
|
||||
line = startline;
|
||||
column = startcolumn - 1;
|
||||
|
||||
available = bufsize = buffersize;
|
||||
buffer = new char[buffersize];
|
||||
bufline = new int[buffersize];
|
||||
bufcolumn = new int[buffersize];
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
this(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream)
|
||||
{
|
||||
this(dstream, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
inputStream = dstream;
|
||||
line = startline;
|
||||
column = startcolumn - 1;
|
||||
|
||||
if (buffer == null || buffersize != buffer.length)
|
||||
{
|
||||
available = bufsize = buffersize;
|
||||
buffer = new char[buffersize];
|
||||
bufline = new int[buffersize];
|
||||
bufcolumn = new int[buffersize];
|
||||
}
|
||||
prevCharIsLF = prevCharIsCR = false;
|
||||
tokenBegin = inBuf = maxNextCharInd = 0;
|
||||
bufpos = -1;
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
ReInit(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream)
|
||||
{
|
||||
ReInit(dstream, 1, 1, 4096);
|
||||
}
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(dstream, encoding, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
this(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(dstream, encoding, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream)
|
||||
{
|
||||
this(dstream, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(dstream, encoding, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream)
|
||||
{
|
||||
ReInit(dstream, 1, 1, 4096);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(dstream, encoding, startline, startcolumn, 4096);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
ReInit(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
/** Get token literal value. */
|
||||
public String GetImage()
|
||||
{
|
||||
if (bufpos >= tokenBegin)
|
||||
return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
|
||||
else
|
||||
return new String(buffer, tokenBegin, bufsize - tokenBegin) +
|
||||
new String(buffer, 0, bufpos + 1);
|
||||
}
|
||||
|
||||
/** Get the suffix. */
|
||||
public char[] GetSuffix(int len)
|
||||
{
|
||||
char[] ret = new char[len];
|
||||
|
||||
if ((bufpos + 1) >= len)
|
||||
System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
|
||||
else
|
||||
{
|
||||
System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
|
||||
len - bufpos - 1);
|
||||
System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Reset buffer when finished. */
|
||||
public void Done()
|
||||
{
|
||||
buffer = null;
|
||||
bufline = null;
|
||||
bufcolumn = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to adjust line and column numbers for the start of a token.
|
||||
*/
|
||||
public void adjustBeginLineColumn(int newLine, int newCol)
|
||||
{
|
||||
int start = tokenBegin;
|
||||
int len;
|
||||
|
||||
if (bufpos >= tokenBegin)
|
||||
{
|
||||
len = bufpos - tokenBegin + inBuf + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
len = bufsize - tokenBegin + bufpos + 1 + inBuf;
|
||||
}
|
||||
|
||||
int i = 0, j = 0, k = 0;
|
||||
int nextColDiff = 0, columnDiff = 0;
|
||||
|
||||
while (i < len &&
|
||||
bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
|
||||
{
|
||||
bufline[j] = newLine;
|
||||
nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
|
||||
bufcolumn[j] = newCol + columnDiff;
|
||||
columnDiff = nextColDiff;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i < len)
|
||||
{
|
||||
bufline[j] = newLine++;
|
||||
bufcolumn[j] = newCol + columnDiff;
|
||||
|
||||
while (i++ < len)
|
||||
{
|
||||
if (bufline[j = start % bufsize] != bufline[++start % bufsize])
|
||||
bufline[j] = newLine++;
|
||||
else
|
||||
bufline[j] = newLine;
|
||||
}
|
||||
}
|
||||
|
||||
line = bufline[j];
|
||||
column = bufcolumn[j];
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */
|
@ -121,4 +121,4 @@ public class Token {
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */
|
||||
|
@ -138,4 +138,4 @@ public class TokenMgrError extends Error
|
||||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||
*/
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
@ -61,7 +62,7 @@ public class AddDocTask extends PerfTask {
|
||||
|
||||
@Override
|
||||
protected String getLogMessage(int recsCount) {
|
||||
return String.format("added %9d docs",recsCount);
|
||||
return String.format(Locale.ROOT, "added %9d docs",recsCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -40,6 +40,7 @@ import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* Create an index. <br>
|
||||
@ -182,7 +183,7 @@ public class CreateIndexTask extends PerfTask {
|
||||
iwc.setInfoStream(System.err);
|
||||
} else {
|
||||
File f = new File(infoStreamVal).getAbsoluteFile();
|
||||
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f))));
|
||||
iwc.setInfoStream(new PrintStream(new BufferedOutputStream(new FileOutputStream(f)), false, Charset.defaultCharset().name()));
|
||||
}
|
||||
}
|
||||
IndexWriter writer = new IndexWriter(runData.getDirectory(), iwc);
|
||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
|
||||
@ -266,7 +268,7 @@ public abstract class PerfTask implements Cloneable {
|
||||
public void tearDown() throws Exception {
|
||||
if (++logStepCount % logStep == 0) {
|
||||
double time = (System.currentTimeMillis() - runData.getStartTimeMillis()) / 1000.0;
|
||||
System.out.println(String.format("%7.2f",time) + " sec --> "
|
||||
System.out.println(String.format(Locale.ROOT, "%7.2f",time) + " sec --> "
|
||||
+ Thread.currentThread().getName() + " " + getLogMessage(logStepCount));
|
||||
}
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ public class SearchWithSortTask extends ReadTask {
|
||||
} else {
|
||||
throw new RuntimeException("You must specify the sort type ie page:int,subject:string");
|
||||
}
|
||||
sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ENGLISH)));
|
||||
sortField0 = new SortField(fieldName, SortField.Type.valueOf(typeString.toUpperCase(Locale.ROOT)));
|
||||
}
|
||||
sortFields[upto++] = sortField0;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.text.NumberFormat;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
@ -428,7 +429,7 @@ public class TaskSequence extends PerfTask {
|
||||
sb.append(padd);
|
||||
sb.append(!letChildReport ? ">" : (parallel ? "]" : "}"));
|
||||
if (fixedTime) {
|
||||
sb.append(" " + NumberFormat.getNumberInstance().format(runTimeSec) + "s");
|
||||
sb.append(" " + NumberFormat.getNumberInstance(Locale.ROOT).format(runTimeSec) + "s");
|
||||
} else if (repetitions>1) {
|
||||
sb.append(" * " + repetitions);
|
||||
} else if (repetitions==REPEAT_EXHAUST) {
|
||||
@ -487,7 +488,7 @@ public class TaskSequence extends PerfTask {
|
||||
if (rate>0) {
|
||||
seqName += "_" + rate + (perMin?"/min":"/sec");
|
||||
}
|
||||
if (parallel && seqName.toLowerCase().indexOf("par")<0) {
|
||||
if (parallel && seqName.toLowerCase(Locale.ROOT).indexOf("par")<0) {
|
||||
seqName += "_Par";
|
||||
}
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import java.io.StringReader;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
||||
@ -159,7 +160,7 @@ public class Algorithm {
|
||||
} else {
|
||||
stok.nextToken();
|
||||
if (stok.ttype!=StreamTokenizer.TT_WORD) throw new Exception("expected rate unit: 'min' or 'sec' - "+stok.toString());
|
||||
String unit = stok.sval.toLowerCase();
|
||||
String unit = stok.sval.toLowerCase(Locale.ROOT);
|
||||
if ("min".equals(unit)) {
|
||||
((TaskSequence)prevTask).setRate((int)stok.nval,true); // set rate per min
|
||||
} else if ("sec".equals(unit)) {
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.benchmark.byTask.utils;
|
||||
*/
|
||||
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Formatting utilities (for reports).
|
||||
@ -25,9 +26,9 @@ import java.text.NumberFormat;
|
||||
public class Format {
|
||||
|
||||
private static NumberFormat numFormat [] = {
|
||||
NumberFormat.getInstance(),
|
||||
NumberFormat.getInstance(),
|
||||
NumberFormat.getInstance(),
|
||||
NumberFormat.getInstance(Locale.ROOT),
|
||||
NumberFormat.getInstance(Locale.ROOT),
|
||||
NumberFormat.getInstance(Locale.ROOT),
|
||||
};
|
||||
private static final String padd = " ";
|
||||
|
||||
|
@ -99,7 +99,7 @@ public class StreamUtils {
|
||||
String fileName = file.getName();
|
||||
int idx = fileName.lastIndexOf('.');
|
||||
if (idx != -1) {
|
||||
type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ENGLISH));
|
||||
type = extensionToType.get(fileName.substring(idx).toLowerCase(Locale.ROOT));
|
||||
}
|
||||
return type==null ? Type.PLAIN : type;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality;
|
||||
import java.io.PrintWriter;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* Results of quality benchmark run for a single query or for a set of queries.
|
||||
@ -141,7 +142,7 @@ public class QualityStats {
|
||||
logger.println(title);
|
||||
}
|
||||
prefix = prefix==null ? "" : prefix;
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
|
||||
nf.setMaximumFractionDigits(3);
|
||||
nf.setMinimumFractionDigits(3);
|
||||
nf.setGroupingUsed(true);
|
||||
|
@ -24,11 +24,13 @@ import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
@ -51,7 +53,7 @@ public class QueryDriver {
|
||||
|
||||
File topicsFile = new File(args[0]);
|
||||
File qrelsFile = new File(args[1]);
|
||||
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2]), "lucene");
|
||||
SubmissionReport submitLog = new SubmissionReport(new PrintWriter(args[2], "UTF-8"), "lucene");
|
||||
FSDirectory dir = FSDirectory.open(new File(args[3]));
|
||||
String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
@ -60,14 +62,14 @@ public class QueryDriver {
|
||||
int maxResults = 1000;
|
||||
String docNameField = "docname";
|
||||
|
||||
PrintWriter logger = new PrintWriter(System.out, true);
|
||||
PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);
|
||||
|
||||
// use trec utilities to read trec topics into quality queries
|
||||
TrecTopicsReader qReader = new TrecTopicsReader();
|
||||
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(new FileReader(topicsFile)));
|
||||
QualityQuery qqs[] = qReader.readQueries(new BufferedReader(IOUtils.getDecodingReader(topicsFile, IOUtils.CHARSET_UTF_8)));
|
||||
|
||||
// prepare judge, with trec utilities that read from a QRels file
|
||||
Judge judge = new TrecJudge(new BufferedReader(new FileReader(qrelsFile)));
|
||||
Judge judge = new TrecJudge(new BufferedReader(IOUtils.getDecodingReader(qrelsFile, IOUtils.CHARSET_UTF_8)));
|
||||
|
||||
// validate topics & judgments match each other
|
||||
judge.validateData(qqs, logger);
|
||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.benchmark.quality.utils;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.benchmark.quality.QualityQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
@ -45,7 +46,7 @@ public class SubmissionReport {
|
||||
public SubmissionReport (PrintWriter logger, String name) {
|
||||
this.logger = logger;
|
||||
this.name = name;
|
||||
nf = NumberFormat.getInstance();
|
||||
nf = NumberFormat.getInstance(Locale.ROOT);
|
||||
nf.setMaximumFractionDigits(4);
|
||||
nf.setMinimumFractionDigits(4);
|
||||
}
|
||||
|
@ -19,12 +19,18 @@ package org.apache.lucene.benchmark.utils;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
||||
/**
|
||||
* Split the Reuters SGML documents into Simple Text files containing: Title, Date, Dateline, Body
|
||||
@ -73,7 +79,7 @@ public class ExtractReuters {
|
||||
*/
|
||||
protected void extractFile(File sgmFile) {
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new FileReader(sgmFile));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(sgmFile), IOUtils.CHARSET_UTF_8));
|
||||
|
||||
StringBuilder buffer = new StringBuilder(1024);
|
||||
StringBuilder outBuffer = new StringBuilder(1024);
|
||||
@ -107,7 +113,7 @@ public class ExtractReuters {
|
||||
File outFile = new File(outputDir, sgmFile.getName() + "-"
|
||||
+ (docNumber++) + ".txt");
|
||||
// System.out.println("Writing " + outFile);
|
||||
FileWriter writer = new FileWriter(outFile);
|
||||
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(outFile), IOUtils.CHARSET_UTF_8);
|
||||
writer.write(out);
|
||||
writer.close();
|
||||
outBuffer.setLength(0);
|
||||
|
@ -18,8 +18,10 @@ package org.apache.lucene.benchmark.utils;
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
@ -28,6 +30,7 @@ import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Extract the downloaded Wikipedia dump into separate files for indexing.
|
||||
@ -83,7 +86,7 @@ public class ExtractWikipedia {
|
||||
contents.append("\n");
|
||||
|
||||
try {
|
||||
FileWriter writer = new FileWriter(f);
|
||||
Writer writer = new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8);
|
||||
writer.write(contents.toString());
|
||||
writer.close();
|
||||
} catch (IOException ioe) {
|
||||
|
@ -166,7 +166,7 @@ public class DocMakerTest extends BenchmarkTestCase {
|
||||
// DocMaker did not close its ContentSource if resetInputs was called twice,
|
||||
// leading to a file handle leak.
|
||||
File f = new File(getWorkDir(), "docMakerLeak.txt");
|
||||
PrintStream ps = new PrintStream(f);
|
||||
PrintStream ps = new PrintStream(f, "UTF-8");
|
||||
ps.println("one title\t" + System.currentTimeMillis() + "\tsome content");
|
||||
ps.close();
|
||||
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.benchmark.BenchmarkTestCase;
|
||||
@ -50,7 +51,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
|
||||
|
||||
PrintStream curOut = System.out;
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
System.setOut(new PrintStream(baos));
|
||||
System.setOut(new PrintStream(baos, false, Charset.defaultCharset().name()));
|
||||
try {
|
||||
PerfRunData runData = createPerfRunData("SystemOut");
|
||||
CreateIndexTask cit = new CreateIndexTask(runData);
|
||||
@ -63,7 +64,7 @@ public class CreateIndexTaskTest extends BenchmarkTestCase {
|
||||
|
||||
PrintStream curErr = System.err;
|
||||
baos.reset();
|
||||
System.setErr(new PrintStream(baos));
|
||||
System.setErr(new PrintStream(baos, false, Charset.defaultCharset().name()));
|
||||
try {
|
||||
PerfRunData runData = createPerfRunData("SystemErr");
|
||||
CreateIndexTask cit = new CreateIndexTask(runData);
|
||||
|
@ -31,6 +31,7 @@ import java.io.OutputStreamWriter;
|
||||
import org.apache.commons.compress.compressors.CompressorStreamFactory;
|
||||
import org.apache.lucene.benchmark.BenchmarkTestCase;
|
||||
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
@ -88,7 +89,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
|
||||
|
||||
private File rawTextFile(String ext) throws Exception {
|
||||
File f = new File(testDir,"testfile." + ext);
|
||||
BufferedWriter w = new BufferedWriter(new FileWriter(f));
|
||||
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f), IOUtils.CHARSET_UTF_8));
|
||||
w.write(TEXT);
|
||||
w.newLine();
|
||||
w.close();
|
||||
@ -117,7 +118,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
|
||||
}
|
||||
|
||||
private void writeText(OutputStream os) throws IOException {
|
||||
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
|
||||
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os, IOUtils.CHARSET_UTF_8));
|
||||
w.write(TEXT);
|
||||
w.newLine();
|
||||
w.close();
|
||||
@ -125,7 +126,7 @@ public class StreamUtilsTest extends BenchmarkTestCase {
|
||||
|
||||
private void assertReadText(File f) throws Exception {
|
||||
InputStream ir = StreamUtils.inputStream(f);
|
||||
InputStreamReader in = new InputStreamReader(ir);
|
||||
InputStreamReader in = new InputStreamReader(ir, IOUtils.CHARSET_UTF_8);
|
||||
BufferedReader r = new BufferedReader(in);
|
||||
String line = r.readLine();
|
||||
assertEquals("Wrong text found in "+f.getName(), TEXT, line);
|
||||
|
@ -31,7 +31,9 @@ import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* Test that quality run does its job.
|
||||
@ -55,7 +57,7 @@ public class TestQualityRun extends BenchmarkTestCase {
|
||||
int maxResults = 1000;
|
||||
String docNameField = "doctitle"; // orig docID is in the linedoc format title
|
||||
|
||||
PrintWriter logger = VERBOSE ? new PrintWriter(System.out,true) : null;
|
||||
PrintWriter logger = VERBOSE ? new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()),true) : null;
|
||||
|
||||
// prepare topics
|
||||
InputStream topics = getClass().getResourceAsStream("trecTopics.txt");
|
||||
|
@ -169,11 +169,19 @@
|
||||
</clover-report>
|
||||
</target>
|
||||
|
||||
<!-- Validate once from top-level. -->
|
||||
<target name="validate" depends="compile-tools,resolve" description="Validate legal stuff.">
|
||||
<!-- Validation (license/notice/api checks). -->
|
||||
<target name="validate" depends="check-licenses,check-forbidden-apis" description="Validate stuff." />
|
||||
|
||||
<target name="check-licenses" depends="compile-tools,resolve,load-custom-tasks" description="Validate license stuff.">
|
||||
<license-check-macro dir="${basedir}" />
|
||||
</target>
|
||||
|
||||
<target name="check-forbidden-apis" depends="compile-tools,compile-test,load-custom-tasks" description="Check forbidden API calls in compiled class files.">
|
||||
<forbidden-apis apiFile="${custom-tasks.dir}/forbiddenApis/jdk.txt">
|
||||
<fileset dir="${basedir}/build" includes="**/*.class" />
|
||||
</forbidden-apis>
|
||||
</target>
|
||||
|
||||
<target name="resolve">
|
||||
<sequential>
|
||||
<ant dir="test-framework" target="resolve" inheritall="false">
|
||||
|
@ -20,8 +20,10 @@ package org.apache.lucene.codecs;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.Locale;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
@ -345,7 +347,12 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
@Override
|
||||
public String toString() {
|
||||
final ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
final PrintStream out = new PrintStream(bos);
|
||||
PrintStream out;
|
||||
try {
|
||||
out = new PrintStream(bos, false, "UTF-8");
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
|
||||
out.println(" index FST:");
|
||||
out.println(" " + indexNodeCount + " nodes");
|
||||
@ -353,7 +360,7 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
out.println(" " + indexNumBytes + " bytes");
|
||||
out.println(" terms:");
|
||||
out.println(" " + totalTermCount + " terms");
|
||||
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format("%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
|
||||
out.println(" " + totalTermBytes + " bytes" + (totalTermCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalTermBytes)/totalTermCount) + " bytes/term)" : ""));
|
||||
out.println(" blocks:");
|
||||
out.println(" " + totalBlockCount + " blocks");
|
||||
out.println(" " + termsOnlyBlockCount + " terms-only blocks");
|
||||
@ -362,9 +369,9 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
out.println(" " + floorBlockCount + " floor blocks");
|
||||
out.println(" " + (totalBlockCount-floorSubBlockCount) + " non-floor blocks");
|
||||
out.println(" " + floorSubBlockCount + " floor sub-blocks");
|
||||
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format("%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockSuffixBytes + " term suffix bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockSuffixBytes)/totalBlockCount) + " suffix-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockStatsBytes + " term stats bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockStatsBytes)/totalBlockCount) + " stats-bytes/block)" : ""));
|
||||
out.println(" " + totalBlockOtherBytes + " other bytes" + (totalBlockCount != 0 ? " (" + String.format(Locale.ROOT, "%.1f", ((double) totalBlockOtherBytes)/totalBlockCount) + " other-bytes/block)" : ""));
|
||||
if (totalBlockCount != 0) {
|
||||
out.println(" by prefix length:");
|
||||
int total = 0;
|
||||
@ -372,13 +379,17 @@ public class BlockTreeTermsReader extends FieldsProducer {
|
||||
final int blockCount = blockCountByPrefixLen[prefix];
|
||||
total += blockCount;
|
||||
if (blockCount != 0) {
|
||||
out.println(" " + String.format("%2d", prefix) + ": " + blockCount);
|
||||
out.println(" " + String.format(Locale.ROOT, "%2d", prefix) + ": " + blockCount);
|
||||
}
|
||||
}
|
||||
assert totalBlockCount == total;
|
||||
}
|
||||
|
||||
return bos.toString();
|
||||
try {
|
||||
return bos.toString("UTF-8");
|
||||
} catch (UnsupportedEncodingException bogus) {
|
||||
throw new RuntimeException(bogus);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -53,7 +53,7 @@ public class DateTools {
|
||||
private static final ThreadLocal<Calendar> TL_CAL = new ThreadLocal<Calendar>() {
|
||||
@Override
|
||||
protected Calendar initialValue() {
|
||||
return Calendar.getInstance(GMT, Locale.US);
|
||||
return Calendar.getInstance(GMT, Locale.ROOT);
|
||||
}
|
||||
};
|
||||
|
||||
@ -194,7 +194,7 @@ public class DateTools {
|
||||
this.formatLen = formatLen;
|
||||
// formatLen 10's place: 11111111
|
||||
// formatLen 1's place: 12345678901234567
|
||||
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.US);
|
||||
this.format = new SimpleDateFormat("yyyyMMddHHmmssSSS".substring(0,formatLen),Locale.ROOT);
|
||||
this.format.setTimeZone(GMT);
|
||||
}
|
||||
|
||||
@ -202,7 +202,7 @@ public class DateTools {
|
||||
* in lowercase (for backwards compatibility) */
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString().toLowerCase(Locale.ENGLISH);
|
||||
return super.toString().toLowerCase(Locale.ROOT);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.codecs.BlockTreeTermsReader;
|
||||
@ -340,7 +341,7 @@ public class CheckIndex {
|
||||
* you only call this when the index is not opened by any
|
||||
* writer. */
|
||||
public Status checkIndex(List<String> onlySegments) throws IOException {
|
||||
NumberFormat nf = NumberFormat.getInstance();
|
||||
NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
|
||||
SegmentInfos sis = new SegmentInfos();
|
||||
Status result = new Status();
|
||||
result.dir = dir;
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.index;
|
||||
import java.io.IOException;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
@ -181,7 +182,7 @@ class DocumentsWriterPerThread {
|
||||
private int flushedDocCount;
|
||||
DocumentsWriterDeleteQueue deleteQueue;
|
||||
DeleteSlice deleteSlice;
|
||||
private final NumberFormat nf = NumberFormat.getInstance();
|
||||
private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
|
||||
final Allocator byteBlockAllocator;
|
||||
|
||||
|
||||
|
@ -27,6 +27,7 @@ import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
@ -3610,7 +3611,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
||||
// lost...
|
||||
|
||||
if (infoStream.isEnabled("IW")) {
|
||||
infoStream.message("IW", String.format("merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
||||
infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.));
|
||||
}
|
||||
|
||||
final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer();
|
||||
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
@ -535,7 +536,7 @@ public abstract class LogMergePolicy extends MergePolicy {
|
||||
if (size >= maxMergeSize) {
|
||||
extra += " [skip: too large]";
|
||||
}
|
||||
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format("%.3f MB", segBytes/1024/1024.) + extra);
|
||||
message("seg=" + writer.get().segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ package org.apache.lucene.index;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
@ -289,7 +290,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||
} else if (segBytes < floorSegmentBytes) {
|
||||
extra += " [floored]";
|
||||
}
|
||||
message(" seg=" + writer.get().segString(info) + " size=" + String.format("%.3f", segBytes/1024/1024.) + " MB" + extra);
|
||||
message(" seg=" + writer.get().segString(info) + " size=" + String.format(Locale.ROOT, "%.3f", segBytes/1024/1024.) + " MB" + extra);
|
||||
}
|
||||
|
||||
minSegmentBytes = Math.min(segBytes, minSegmentBytes);
|
||||
@ -388,7 +389,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||
|
||||
final MergeScore score = score(candidate, hitTooLarge, mergingBytes);
|
||||
if (verbose()) {
|
||||
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format("%.3f MB", totAfterMergeBytes/1024./1024.));
|
||||
message(" maybe=" + writer.get().segString(candidate) + " score=" + score.getScore() + " " + score.getExplanation() + " tooLarge=" + hitTooLarge + " size=" + String.format(Locale.ROOT, "%.3f MB", totAfterMergeBytes/1024./1024.));
|
||||
}
|
||||
|
||||
// If we are already running a max sized merge
|
||||
@ -413,7 +414,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||
}
|
||||
|
||||
if (verbose()) {
|
||||
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format("%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format("%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
|
||||
message(" add merge=" + writer.get().segString(merge.segments) + " size=" + String.format(Locale.ROOT, "%.3f MB", bestMergeBytes/1024./1024.) + " score=" + String.format(Locale.ROOT, "%.3f", bestScore.getScore()) + " " + bestScore.getExplanation() + (bestTooLarge ? " [max merge]" : ""));
|
||||
}
|
||||
} else {
|
||||
return spec;
|
||||
@ -475,7 +476,7 @@ public class TieredMergePolicy extends MergePolicy {
|
||||
|
||||
@Override
|
||||
public String getExplanation() {
|
||||
return "skew=" + String.format("%.3f", skew) + " nonDelRatio=" + String.format("%.3f", nonDelRatio);
|
||||
return "skew=" + String.format(Locale.ROOT, "%.3f", skew) + " nonDelRatio=" + String.format(Locale.ROOT, "%.3f", nonDelRatio);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
@ -92,6 +94,6 @@ public class LMDirichletSimilarity extends LMSimilarity {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return String.format("Dirichlet(%f)", getMu());
|
||||
return String.format(Locale.ROOT, "Dirichlet(%f)", getMu());
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
@ -72,6 +74,6 @@ public class LMJelinekMercerSimilarity extends LMSimilarity {
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return String.format("Jelinek-Mercer(%f)", getLambda());
|
||||
return String.format(Locale.ROOT, "Jelinek-Mercer(%f)", getLambda());
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.search.similarities;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.search.CollectionStatistics;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.TermStatistics;
|
||||
@ -91,9 +93,9 @@ public abstract class LMSimilarity extends SimilarityBase {
|
||||
public String toString() {
|
||||
String coll = collectionModel.getName();
|
||||
if (coll != null) {
|
||||
return String.format("LM %s - %s", getName(), coll);
|
||||
return String.format(Locale.ROOT, "LM %s - %s", getName(), coll);
|
||||
} else {
|
||||
return String.format("LM %s", getName());
|
||||
return String.format(Locale.ROOT, "LM %s", getName());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@ package org.apache.lucene.util;
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
@ -43,7 +44,7 @@ public class NamedThreadFactory implements ThreadFactory {
|
||||
final SecurityManager s = System.getSecurityManager();
|
||||
group = (s != null) ? s.getThreadGroup() : Thread.currentThread()
|
||||
.getThreadGroup();
|
||||
this.threadNamePrefix = String.format(NAME_PATTERN,
|
||||
this.threadNamePrefix = String.format(Locale.ROOT, NAME_PATTERN,
|
||||
checkPrefix(threadNamePrefix), threadPoolNumber.getAndIncrement());
|
||||
}
|
||||
|
||||
@ -57,7 +58,7 @@ public class NamedThreadFactory implements ThreadFactory {
|
||||
* @see java.util.concurrent.ThreadFactory#newThread(java.lang.Runnable)
|
||||
*/
|
||||
public Thread newThread(Runnable r) {
|
||||
final Thread t = new Thread(group, r, String.format("%s-%d",
|
||||
final Thread t = new Thread(group, r, String.format(Locale.ROOT, "%s-%d",
|
||||
this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
|
||||
t.setDaemon(false);
|
||||
t.setPriority(Thread.NORM_PRIORITY);
|
||||
|
@ -559,7 +559,7 @@ public final class RamUsageEstimator {
|
||||
*/
|
||||
public static String humanReadableUnits(long bytes) {
|
||||
return humanReadableUnits(bytes,
|
||||
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ENGLISH)));
|
||||
new DecimalFormat("0.#", DecimalFormatSymbols.getInstance(Locale.ROOT)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -73,7 +73,7 @@ public enum Version {
|
||||
}
|
||||
|
||||
public static Version parseLeniently(String version) {
|
||||
String parsedMatchVersion = version.toUpperCase(Locale.ENGLISH);
|
||||
String parsedMatchVersion = version.toUpperCase(Locale.ROOT);
|
||||
return Version.valueOf(parsedMatchVersion.replaceFirst("^(\\d)\\.(\\d)$", "LUCENE_$1$2"));
|
||||
}
|
||||
}
|
@ -159,7 +159,7 @@ public class TestCharTermAttributeImpl extends LuceneTestCase {
|
||||
|
||||
public void testAppendableInterface() {
|
||||
CharTermAttributeImpl t = new CharTermAttributeImpl();
|
||||
Formatter formatter = new Formatter(t, Locale.US);
|
||||
Formatter formatter = new Formatter(t, Locale.ROOT);
|
||||
formatter.format("%d", 1234);
|
||||
assertEquals("1234", t.toString());
|
||||
formatter.format("%d", 5678);
|
||||
|
@ -71,7 +71,7 @@ public class Test10KPulsings extends LuceneTestCase {
|
||||
Field field = newField("field", "", ft);
|
||||
document.add(field);
|
||||
|
||||
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
|
||||
|
||||
for (int i = 0; i < 10050; i++) {
|
||||
field.setStringValue(df.format(i));
|
||||
@ -122,7 +122,7 @@ public class Test10KPulsings extends LuceneTestCase {
|
||||
Field field = newField("field", "", ft);
|
||||
document.add(field);
|
||||
|
||||
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||
NumberFormat df = new DecimalFormat("00000", new DecimalFormatSymbols(Locale.ROOT));
|
||||
|
||||
final int freq = freqCutoff + 1;
|
||||
|
||||
|
@ -37,7 +37,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
||||
{
|
||||
FieldType ft = new FieldType();
|
||||
ft.setStored(true);
|
||||
IndexableField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes());
|
||||
IndexableField binaryFldStored = new StoredField("binaryStored", binaryValStored.getBytes("UTF-8"));
|
||||
IndexableField stringFldStored = new Field("stringStored", binaryValStored, ft);
|
||||
|
||||
Document doc = new Document();
|
||||
@ -62,7 +62,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
||||
/** fetch the binary stored field and compare it's content with the original one */
|
||||
BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
|
||||
assertNotNull(bytes);
|
||||
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length);
|
||||
String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length, "UTF-8");
|
||||
assertTrue(binaryFldStoredTest.equals(binaryValStored));
|
||||
|
||||
/** fetch the string field and compare it's content with the original one */
|
||||
@ -75,7 +75,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testCompressionTools() throws Exception {
|
||||
IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes()));
|
||||
IndexableField binaryFldCompressed = new StoredField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes("UTF-8")));
|
||||
IndexableField stringFldCompressed = new StoredField("stringCompressed", CompressionTools.compressString(binaryValCompressed));
|
||||
|
||||
Document doc = new Document();
|
||||
@ -94,7 +94,7 @@ public class TestBinaryDocument extends LuceneTestCase {
|
||||
assertTrue(docFromReader != null);
|
||||
|
||||
/** fetch the binary compressed field and compare it's content with the original one */
|
||||
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")));
|
||||
String binaryFldCompressedTest = new String(CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")), "UTF-8");
|
||||
assertTrue(binaryFldCompressedTest.equals(binaryValCompressed));
|
||||
assertTrue(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")).equals(binaryValCompressed));
|
||||
|
||||
|
@ -61,12 +61,12 @@ public class TestDateTools extends LuceneTestCase {
|
||||
|
||||
public void testStringtoTime() throws ParseException {
|
||||
long time = DateTools.stringToTime("197001010000");
|
||||
Calendar cal = new GregorianCalendar();
|
||||
// we use default locale since LuceneTestCase randomizes it
|
||||
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
|
||||
cal.clear();
|
||||
cal.set(1970, 0, 1, // year=1970, month=january, day=1
|
||||
0, 0, 0); // hour, minute, second
|
||||
cal.set(Calendar.MILLISECOND, 0);
|
||||
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
|
||||
assertEquals(cal.getTime().getTime(), time);
|
||||
cal.set(1980, 1, 2, // year=1980, month=february, day=2
|
||||
11, 5, 0); // hour, minute, second
|
||||
@ -76,9 +76,9 @@ public class TestDateTools extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testDateAndTimetoString() throws ParseException {
|
||||
Calendar cal = new GregorianCalendar();
|
||||
// we use default locale since LuceneTestCase randomizes it
|
||||
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
|
||||
cal.clear();
|
||||
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
|
||||
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
|
||||
22, 8, 56); // hour, minute, second
|
||||
cal.set(Calendar.MILLISECOND, 333);
|
||||
@ -141,9 +141,9 @@ public class TestDateTools extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testRound() {
|
||||
Calendar cal = new GregorianCalendar();
|
||||
// we use default locale since LuceneTestCase randomizes it
|
||||
Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("GMT"), Locale.getDefault());
|
||||
cal.clear();
|
||||
cal.setTimeZone(TimeZone.getTimeZone("GMT"));
|
||||
cal.set(2004, 1, 3, // year=2004, month=february(!), day=3
|
||||
22, 8, 56); // hour, minute, second
|
||||
cal.set(Calendar.MILLISECOND, 333);
|
||||
@ -180,7 +180,7 @@ public class TestDateTools extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private String isoFormat(Date date) {
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.US);
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss:SSS", Locale.ROOT);
|
||||
sdf.setTimeZone(TimeZone.getTimeZone("GMT"));
|
||||
return sdf.format(date);
|
||||
}
|
||||
|
@ -219,10 +219,10 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos));
|
||||
checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
assertFalse(indexStatus.clean);
|
||||
assertTrue(bos.toString().contains(IndexFormatTooOldException.class.getName()));
|
||||
assertTrue(bos.toString("UTF-8").contains(IndexFormatTooOldException.class.getName()));
|
||||
|
||||
dir.close();
|
||||
_TestUtil.rmDir(oldIndxeDir);
|
||||
|
@ -52,12 +52,12 @@ public class TestCheckIndex extends LuceneTestCase {
|
||||
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
CheckIndex checker = new CheckIndex(dir);
|
||||
checker.setInfoStream(new PrintStream(bos));
|
||||
checker.setInfoStream(new PrintStream(bos, false, "UTF-8"));
|
||||
if (VERBOSE) checker.setInfoStream(System.out);
|
||||
CheckIndex.Status indexStatus = checker.checkIndex();
|
||||
if (indexStatus.clean == false) {
|
||||
System.out.println("CheckIndex failed");
|
||||
System.out.println(bos.toString());
|
||||
System.out.println(bos.toString("UTF-8"));
|
||||
fail();
|
||||
}
|
||||
|
||||
|
@ -17,11 +17,14 @@ package org.apache.lucene.index;
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.io.Writer;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
@ -78,14 +81,14 @@ public class TestDoc extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private File createOutput(String name, String text) throws IOException {
|
||||
FileWriter fw = null;
|
||||
Writer fw = null;
|
||||
PrintWriter pw = null;
|
||||
|
||||
try {
|
||||
File f = new File(workDir, name);
|
||||
if (f.exists()) f.delete();
|
||||
|
||||
fw = new FileWriter(f);
|
||||
fw = new OutputStreamWriter(new FileOutputStream(f), "UTF-8");
|
||||
pw = new PrintWriter(fw);
|
||||
pw.println(text);
|
||||
return f;
|
||||
@ -182,9 +185,11 @@ public class TestDoc extends LuceneTestCase {
|
||||
{
|
||||
File file = new File(workDir, fileName);
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("contents", new FileReader(file), Field.Store.NO));
|
||||
InputStreamReader is = new InputStreamReader(new FileInputStream(file), "UTF-8");
|
||||
doc.add(new TextField("contents", is, Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
is.close();
|
||||
return writer.newestSegment();
|
||||
}
|
||||
|
||||
|
@ -43,9 +43,8 @@ public class TestPayloads extends LuceneTestCase {
|
||||
|
||||
// Simple tests to test the Payload class
|
||||
public void testPayload() throws Exception {
|
||||
byte[] testData = "This is a test!".getBytes();
|
||||
BytesRef payload = new BytesRef(testData);
|
||||
assertEquals("Wrong payload length.", testData.length, payload.length);
|
||||
BytesRef payload = new BytesRef("This is a test!");
|
||||
assertEquals("Wrong payload length.", "This is a test!".length(), payload.length);
|
||||
|
||||
BytesRef clone = payload.clone();
|
||||
assertEquals(payload.length, clone.length);
|
||||
@ -73,7 +72,7 @@ public class TestPayloads extends LuceneTestCase {
|
||||
// enabled in only some documents
|
||||
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
||||
// only add payload data for field f2
|
||||
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
|
||||
analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
|
||||
writer.addDocument(d);
|
||||
// flush
|
||||
writer.close();
|
||||
@ -96,8 +95,8 @@ public class TestPayloads extends LuceneTestCase {
|
||||
d.add(newTextField("f2", "This field has payloads in all docs", Field.Store.NO));
|
||||
d.add(newTextField("f3", "This field has payloads in some docs", Field.Store.NO));
|
||||
// add payload data for field f2 and f3
|
||||
analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1);
|
||||
analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3);
|
||||
analyzer.setPayloadData("f2", "somedata".getBytes("UTF-8"), 0, 1);
|
||||
analyzer.setPayloadData("f3", "somedata".getBytes("UTF-8"), 0, 3);
|
||||
writer.addDocument(d);
|
||||
|
||||
// force merge
|
||||
|
@ -19,8 +19,10 @@ package org.apache.lucene.search;
|
||||
import java.io.IOException;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.TimeZone;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.lucene.document.DateTools;
|
||||
@ -230,10 +232,12 @@ public class TestCustomSearcherSort extends LuceneTestCase {
|
||||
private class RandomGen {
|
||||
RandomGen(Random random) {
|
||||
this.random = random;
|
||||
base.set(1980, 1, 1);
|
||||
}
|
||||
|
||||
private Random random;
|
||||
private Calendar base = new GregorianCalendar(1980, 1, 1);
|
||||
// we use the default Locale/TZ since LuceneTestCase randomizes it
|
||||
private Calendar base = new GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
|
||||
|
||||
// Just to generate some different Lucene Date strings
|
||||
private String getLuceneDate() {
|
||||
|
@ -23,6 +23,7 @@ import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
@ -117,10 +118,10 @@ public class TestFieldCache extends LuceneTestCase {
|
||||
try {
|
||||
FieldCache cache = FieldCache.DEFAULT;
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
|
||||
cache.setInfoStream(new PrintStream(bos));
|
||||
cache.setInfoStream(new PrintStream(bos, false, "UTF-8"));
|
||||
cache.getDoubles(reader, "theDouble", false);
|
||||
cache.getFloats(reader, "theDouble", false);
|
||||
assertTrue(bos.toString().indexOf("WARNING") != -1);
|
||||
assertTrue(bos.toString("UTF-8").indexOf("WARNING") != -1);
|
||||
} finally {
|
||||
FieldCache.DEFAULT.purgeAllCaches();
|
||||
}
|
||||
@ -261,7 +262,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||
if (chunk == 0) {
|
||||
for (int ord = 0; ord < values.size(); ord++) {
|
||||
BytesRef term = values.get(ord);
|
||||
assertNull(String.format("Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
|
||||
assertNull(String.format(Locale.ROOT, "Document[%d] misses field must be null. Has value %s for ord %d", i, term, ord), term);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -275,7 +276,7 @@ public class TestFieldCache extends LuceneTestCase {
|
||||
reuse = termOrds.lookup(i, reuse);
|
||||
reuse.read(buffer);
|
||||
}
|
||||
assertTrue(String.format("Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
|
||||
assertTrue(String.format(Locale.ROOT, "Expected value %s for doc %d and ord %d, but was %s", expected, i, idx, actual), expected.equals(actual));
|
||||
}
|
||||
|
||||
if (chunk <= buffer.length) {
|
||||
|
@ -44,7 +44,7 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase {
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
|
||||
.setMaxBufferedDocs(_TestUtil.nextInt(random(), 50, 1000)));
|
||||
|
||||
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.US));
|
||||
DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT));
|
||||
|
||||
int num = atLeast(500);
|
||||
for (int l = 0; l < num; l++) {
|
||||
|
@ -58,7 +58,7 @@ public class TestRegexpRandom extends LuceneTestCase {
|
||||
Field field = newField("field", "", customType);
|
||||
doc.add(field);
|
||||
|
||||
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
field.setStringValue(df.format(i));
|
||||
writer.addDocument(doc);
|
||||
|
@ -54,7 +54,7 @@ public class TestWildcardRandom extends LuceneTestCase {
|
||||
Field field = newStringField("field", "", Field.Store.NO);
|
||||
doc.add(field);
|
||||
|
||||
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH));
|
||||
NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ROOT));
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
field.setStringValue(df.format(i));
|
||||
writer.addDocument(doc);
|
||||
|
@ -81,7 +81,7 @@ public class TestBasics extends LuceneTestCase {
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes()));
|
||||
payloadAttr.setPayload(new BytesRef(("pos: " + pos).getBytes("UTF-8")));
|
||||
pos++;
|
||||
return true;
|
||||
} else {
|
||||
@ -411,7 +411,7 @@ public class TestBasics extends LuceneTestCase {
|
||||
@Test
|
||||
public void testSpanPayloadCheck() throws Exception {
|
||||
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
|
||||
BytesRef pay = new BytesRef(("pos: " + 5).getBytes());
|
||||
BytesRef pay = new BytesRef(("pos: " + 5).getBytes("UTF-8"));
|
||||
SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay.bytes));
|
||||
checkHits(query, new int[]
|
||||
{1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
|
||||
@ -426,8 +426,8 @@ public class TestBasics extends LuceneTestCase {
|
||||
clauses[0] = term1;
|
||||
clauses[1] = term2;
|
||||
snq = new SpanNearQuery(clauses, 0, true);
|
||||
pay = new BytesRef(("pos: " + 0).getBytes());
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes());
|
||||
pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
|
||||
list = new ArrayList<byte[]>();
|
||||
list.add(pay.bytes);
|
||||
list.add(pay2.bytes);
|
||||
@ -439,9 +439,9 @@ public class TestBasics extends LuceneTestCase {
|
||||
clauses[1] = term2;
|
||||
clauses[2] = new SpanTermQuery(new Term("field", "five"));
|
||||
snq = new SpanNearQuery(clauses, 0, true);
|
||||
pay = new BytesRef(("pos: " + 0).getBytes());
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes());
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes());
|
||||
pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
|
||||
pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 2).getBytes("UTF-8"));
|
||||
list = new ArrayList<byte[]>();
|
||||
list.add(pay.bytes);
|
||||
list.add(pay2.bytes);
|
||||
@ -470,10 +470,10 @@ public class TestBasics extends LuceneTestCase {
|
||||
checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
|
||||
|
||||
Collection<byte[]> payloads = new ArrayList<byte[]>();
|
||||
BytesRef pay = new BytesRef(("pos: " + 0).getBytes());
|
||||
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes());
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes());
|
||||
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes());
|
||||
BytesRef pay = new BytesRef(("pos: " + 0).getBytes("UTF-8"));
|
||||
BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes("UTF-8"));
|
||||
BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes("UTF-8"));
|
||||
BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes("UTF-8"));
|
||||
payloads.add(pay.bytes);
|
||||
payloads.add(pay2.bytes);
|
||||
payloads.add(pay3.bytes);
|
||||
|
@ -276,7 +276,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload));
|
||||
payloadSet.add(new String(payload, "UTF-8"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -311,7 +311,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
while (spans.next()) {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
for (final byte[] payload : payloads) {
|
||||
payloadSet.add(new String(payload));
|
||||
payloadSet.add(new String(payload, "UTF-8"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -347,7 +347,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
Collection<byte[]> payloads = spans.getPayload();
|
||||
|
||||
for (final byte [] payload : payloads) {
|
||||
payloadSet.add(new String(payload));
|
||||
payloadSet.add(new String(payload, "UTF-8"));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -383,7 +383,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
System.out.println("Num payloads:" + payloads.size());
|
||||
for (final byte [] bytes : payloads) {
|
||||
if(VERBOSE)
|
||||
System.out.println(new String(bytes));
|
||||
System.out.println(new String(bytes, "UTF-8"));
|
||||
}
|
||||
reader.close();
|
||||
directory.close();
|
||||
@ -456,7 +456,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
for (final byte [] bytes : payload) {
|
||||
if(VERBOSE)
|
||||
System.out.println("doc:" + spans.doc() + " s:" + spans.start() + " e:" + spans.end() + " "
|
||||
+ new String(bytes));
|
||||
+ new String(bytes, "UTF-8"));
|
||||
}
|
||||
|
||||
assertEquals(numPayloads[cnt],payload.size());
|
||||
@ -505,9 +505,9 @@ public class TestPayloadSpans extends LuceneTestCase {
|
||||
|
||||
if (!nopayload.contains(token)) {
|
||||
if (entities.contains(token)) {
|
||||
payloadAtt.setPayload(new BytesRef((token + ":Entity:"+ pos ).getBytes()));
|
||||
payloadAtt.setPayload(new BytesRef(token + ":Entity:"+ pos ));
|
||||
} else {
|
||||
payloadAtt.setPayload(new BytesRef((token + ":Noise:" + pos ).getBytes()));
|
||||
payloadAtt.setPayload(new BytesRef(token + ":Noise:" + pos ));
|
||||
}
|
||||
}
|
||||
pos += posIncrAtt.getPositionIncrement();
|
||||
|
@ -59,7 +59,7 @@ public class StressRamUsageEstimator extends LuceneTestCase {
|
||||
// Check the current memory consumption and provide the estimate.
|
||||
long jvmUsed = memoryMXBean.getHeapMemoryUsage().getUsed();
|
||||
long estimated = RamUsageEstimator.sizeOf(first);
|
||||
System.out.println(String.format(Locale.ENGLISH, "%10d, %10d",
|
||||
System.out.println(String.format(Locale.ROOT, "%10d, %10d",
|
||||
jvmUsed, estimated));
|
||||
|
||||
// Make a batch of objects.
|
||||
@ -125,7 +125,7 @@ public class StressRamUsageEstimator extends LuceneTestCase {
|
||||
break;
|
||||
}
|
||||
|
||||
System.out.println(String.format(Locale.ENGLISH, "%10s\t%10s\t%10s",
|
||||
System.out.println(String.format(Locale.ROOT, "%10s\t%10s\t%10s",
|
||||
RamUsageEstimator.humanReadableUnits(mu.getUsed()),
|
||||
RamUsageEstimator.humanReadableUnits(mu.getMax()),
|
||||
RamUsageEstimator.humanReadableUnits(estimated)));
|
||||
|
@ -1298,7 +1298,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
ord++;
|
||||
if (ord % 500000 == 0) {
|
||||
System.out.println(
|
||||
String.format(Locale.ENGLISH,
|
||||
String.format(Locale.ROOT,
|
||||
"%6.2fs: %9d...", ((System.currentTimeMillis() - tStart) / 1000.0), ord));
|
||||
}
|
||||
if (ord >= limit) {
|
||||
@ -1637,7 +1637,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
String idString;
|
||||
if (cycle == 0) {
|
||||
// PKs are assigned sequentially
|
||||
idString = String.format("%07d", id);
|
||||
idString = String.format(Locale.ROOT, "%07d", id);
|
||||
} else {
|
||||
while(true) {
|
||||
final String s = Long.toString(random().nextLong());
|
||||
@ -1668,7 +1668,7 @@ public class TestFSTs extends LuceneTestCase {
|
||||
for(int idx=0;idx<NUM_IDS/10;idx++) {
|
||||
String idString;
|
||||
if (cycle == 0) {
|
||||
idString = String.format("%07d", (NUM_IDS + idx));
|
||||
idString = String.format(Locale.ROOT, "%07d", (NUM_IDS + idx));
|
||||
} else {
|
||||
while(true) {
|
||||
idString = Long.toString(random().nextLong());
|
||||
@ -1710,8 +1710,8 @@ public class TestFSTs extends LuceneTestCase {
|
||||
exists = false;
|
||||
final int idv = random().nextInt(NUM_IDS-1);
|
||||
if (cycle == 0) {
|
||||
id = String.format("%07da", idv);
|
||||
nextID = String.format("%07d", idv+1);
|
||||
id = String.format(Locale.ROOT, "%07da", idv);
|
||||
nextID = String.format(Locale.ROOT, "%07d", idv+1);
|
||||
} else {
|
||||
id = sortedAllIDsList.get(idv) + "a";
|
||||
nextID = sortedAllIDsList.get(idv+1);
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.util.packed;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
@ -267,7 +268,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
fill(packedInt, PackedInts.maxValue(bitsPerValue), randomSeed);
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace(System.err);
|
||||
fail(String.format(
|
||||
fail(String.format(Locale.ROOT,
|
||||
"Exception while filling %s: valueCount=%d, bitsPerValue=%s",
|
||||
packedInt.getClass().getSimpleName(),
|
||||
valueCount, bitsPerValue));
|
||||
@ -311,7 +312,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
for (int i = 0 ; i < packedInt.size() ; i++) {
|
||||
long value = _TestUtil.nextLong(rnd2, 0, maxValue);
|
||||
packedInt.set(i, value);
|
||||
assertEquals(String.format(
|
||||
assertEquals(String.format(Locale.ROOT,
|
||||
"The set/get of the value at index %d should match for %s",
|
||||
i, packedInt.getClass().getSimpleName()),
|
||||
value, packedInt.get(i));
|
||||
@ -336,7 +337,7 @@ public class TestPackedInts extends LuceneTestCase {
|
||||
}
|
||||
for (int i = 0 ; i < valueCount ; i++) {
|
||||
for (int j = 1 ; j < packedInts.size() ; j++) {
|
||||
assertEquals(String.format(
|
||||
assertEquals(String.format(Locale.ROOT,
|
||||
"%s. The value at index %d should be the same for %s and %s",
|
||||
message, i, base.getClass().getSimpleName(),
|
||||
packedInts.get(j).getClass().getSimpleName()),
|
||||
|
@ -48,6 +48,7 @@ import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
@ -131,7 +132,7 @@ public class FormBasedXmlQueryDemo extends HttpServlet {
|
||||
IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer);
|
||||
IndexWriter writer = new IndexWriter(rd, iwConfig);
|
||||
InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataIn));
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataIn, IOUtils.CHARSET_UTF_8));
|
||||
String line = br.readLine();
|
||||
final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
|
||||
textNoNorms.setOmitNorms(true);
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.demo;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
@ -30,11 +31,11 @@ public class TestDemo extends LuceneTestCase {
|
||||
PrintStream outSave = System.out;
|
||||
try {
|
||||
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
|
||||
PrintStream fakeSystemOut = new PrintStream(bytes);
|
||||
PrintStream fakeSystemOut = new PrintStream(bytes, false, Charset.defaultCharset().name());
|
||||
System.setOut(fakeSystemOut);
|
||||
SearchFiles.main(new String[] {"-query", query, "-index", indexPath.getPath()});
|
||||
fakeSystemOut.flush();
|
||||
String output = bytes.toString(); // intentionally use default encoding
|
||||
String output = bytes.toString(Charset.defaultCharset().name()); // intentionally use default encoding
|
||||
assertTrue("output=" + output, output.contains(expectedHitCount + " total matching documents"));
|
||||
} finally {
|
||||
System.setOut(outSave);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user