fix test lowercase bug

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1669533 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-03-27 09:17:12 +00:00
parent 6a6c729920
commit bce10efeb4
2 changed files with 31 additions and 9 deletions

View File

@ -181,9 +181,9 @@ public final class StemmerOverrideFilter extends TokenFilter {
charsSpare.grow(length);
final char[] buffer = charsSpare.chars();
for (int i = 0; i < length; ) {
i += Character.toChars(
Character.toLowerCase(
Character.codePointAt(input, i)), buffer, i);
i += Character.toChars(
Character.toLowerCase(
Character.codePointAt(input, i)), buffer, i);
}
spare.copyChars(buffer, 0, length);
} else {

View File

@ -19,9 +19,10 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -31,6 +32,7 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.util.CharacterUtils;
import org.apache.lucene.util.TestUtil;
/**
@ -79,7 +81,12 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
public void testRandomRealisticWhiteSpace() throws IOException {
Map<String,String> map = new HashMap<>();
Set<String> seen = new HashSet<>();
int numTerms = atLeast(50);
boolean ignoreCase = random().nextBoolean();
CharacterUtils charUtils = CharacterUtils.getInstance();
for (int i = 0; i < numTerms; i++) {
String randomRealisticUnicodeString = TestUtil
.randomRealisticUnicodeString(random());
@ -93,16 +100,31 @@ public class TestStemmerOverrideFilter extends BaseTokenStreamTestCase {
j += Character.charCount(cp);
}
if (builder.length() > 0) {
String value = TestUtil.randomSimpleString(random());
map.put(builder.toString(),
value.isEmpty() ? "a" : value);
String inputValue = builder.toString();
// Make sure we don't try to add two inputs that vary only by case:
String seenInputValue;
if (ignoreCase) {
// TODO: can we simply use inputValue.toLowerCase(Locale.ROOT)???
char[] buffer = inputValue.toCharArray();
charUtils.toLowerCase(buffer, 0, buffer.length);
seenInputValue = buffer.toString();
} else {
seenInputValue = inputValue;
}
if (seen.contains(seenInputValue) == false) {
seen.add(seenInputValue);
String value = TestUtil.randomSimpleString(random());
map.put(inputValue,
value.isEmpty() ? "a" : value);
}
}
}
if (map.isEmpty()) {
map.put("booked", "books");
}
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random().nextBoolean());
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
Set<Entry<String,String>> entrySet = map.entrySet();
StringBuilder input = new StringBuilder();
List<String> output = new ArrayList<>();