LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and Function interfaces.

This commit is contained in:
Adrien Grand 2016-12-21 19:25:54 +01:00
parent 7e03427fa1
commit 5020ea28bc
2 changed files with 45 additions and 135 deletions

View File

@ -190,6 +190,9 @@ Other
* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to * LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
be customized. (David Smiley) be customized. (David Smiley)
* LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and
Function interfaces. (Ahmet Arslan via Adrien Grand)
Build Build
* LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman) * LUCENE-7387: fix defaultCodec in build.xml to account for the line ending (hossman)

View File

@ -45,6 +45,8 @@ import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -106,15 +108,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
static List<Constructor<? extends TokenFilter>> tokenfilters; static List<Constructor<? extends TokenFilter>> tokenfilters;
static List<Constructor<? extends CharFilter>> charfilters; static List<Constructor<? extends CharFilter>> charfilters;
private static interface Predicate<T> { private static final Predicate<Object[]> ALWAYS = (objects -> true);
boolean apply(T o);
}
private static final Predicate<Object[]> ALWAYS = new Predicate<Object[]>() {
public boolean apply(Object[] args) {
return true;
};
};
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>(); private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
static { static {
@ -124,36 +118,27 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
ALWAYS); ALWAYS);
brokenConstructors.put( brokenConstructors.put(
LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), LimitTokenCountFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
new Predicate<Object[]>() { args -> {
@Override
public boolean apply(Object[] args) {
assert args.length == 3; assert args.length == 3;
return !((Boolean) args[2]); // args are broken if consumeAllTokens is false return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
}
}); });
brokenConstructors.put( brokenConstructors.put(
LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class), LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class),
ALWAYS); ALWAYS);
brokenConstructors.put( brokenConstructors.put(
LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), LimitTokenOffsetFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
new Predicate<Object[]>() { args -> {
@Override
public boolean apply(Object[] args) {
assert args.length == 3; assert args.length == 3;
return !((Boolean) args[2]); // args are broken if consumeAllTokens is false return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
}
}); });
brokenConstructors.put( brokenConstructors.put(
LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class), LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class),
ALWAYS); ALWAYS);
brokenConstructors.put( brokenConstructors.put(
LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class, boolean.class), LimitTokenPositionFilter.class.getConstructor(TokenStream.class, int.class, boolean.class),
new Predicate<Object[]>() { args -> {
@Override
public boolean apply(Object[] args) {
assert args.length == 3; assert args.length == 3;
return !((Boolean) args[2]); // args are broken if consumeAllTokens is false return !((Boolean) args[2]); // args are broken if consumeAllTokens is false
}
}); });
for (Class<?> c : Arrays.<Class<?>>asList( for (Class<?> c : Arrays.<Class<?>>asList(
// TODO: can we promote some of these to be only // TODO: can we promote some of these to be only
@ -247,12 +232,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} }
} }
final Comparator<Constructor<?>> ctorComp = new Comparator<Constructor<?>>() { final Comparator<Constructor<?>> ctorComp = (arg0, arg1) -> arg0.toGenericString().compareTo(arg1.toGenericString());
@Override
public int compare(Constructor<?> arg0, Constructor<?> arg1) {
return arg0.toGenericString().compareTo(arg1.toGenericString());
}
};
Collections.sort(tokenizers, ctorComp); Collections.sort(tokenizers, ctorComp);
Collections.sort(tokenfilters, ctorComp); Collections.sort(tokenfilters, ctorComp);
Collections.sort(charfilters, ctorComp); Collections.sort(charfilters, ctorComp);
@ -318,21 +298,14 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} }
} }
private static interface ArgProducer { private static final Map<Class<?>,Function<Random,Object>> argProducers = new IdentityHashMap<Class<?>,Function<Random,Object>>() {{
Object create(Random random); put(int.class, random -> {
}
private static final Map<Class<?>,ArgProducer> argProducers = new IdentityHashMap<Class<?>,ArgProducer>() {{
put(int.class, new ArgProducer() {
@Override public Object create(Random random) {
// TODO: could cause huge ram usage to use full int range for some filters // TODO: could cause huge ram usage to use full int range for some filters
// (e.g. allocate enormous arrays) // (e.g. allocate enormous arrays)
// return Integer.valueOf(random.nextInt()); // return Integer.valueOf(random.nextInt());
return Integer.valueOf(TestUtil.nextInt(random, -50, 50)); return Integer.valueOf(TestUtil.nextInt(random, -50, 50));
}
}); });
put(char.class, new ArgProducer() { put(char.class, random -> {
@Override public Object create(Random random) {
// TODO: fix any filters that care to throw IAE instead. // TODO: fix any filters that care to throw IAE instead.
// also add a unicode validating filter to validate termAtt? // also add a unicode validating filter to validate termAtt?
// return Character.valueOf((char)random.nextInt(65536)); // return Character.valueOf((char)random.nextInt(65536));
@ -342,49 +315,19 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
return Character.valueOf(c); return Character.valueOf(c);
} }
} }
}
}); });
put(float.class, new ArgProducer() { put(float.class, Random::nextFloat);
@Override public Object create(Random random) { put(boolean.class, Random::nextBoolean);
return Float.valueOf(random.nextFloat()); put(byte.class, random -> (byte) random.nextInt(256));
} put(byte[].class, random -> {
});
put(boolean.class, new ArgProducer() {
@Override public Object create(Random random) {
return Boolean.valueOf(random.nextBoolean());
}
});
put(byte.class, new ArgProducer() {
@Override public Object create(Random random) {
// this wraps to negative when casting to byte
return Byte.valueOf((byte) random.nextInt(256));
}
});
put(byte[].class, new ArgProducer() {
@Override public Object create(Random random) {
byte bytes[] = new byte[random.nextInt(256)]; byte bytes[] = new byte[random.nextInt(256)];
random.nextBytes(bytes); random.nextBytes(bytes);
return bytes; return bytes;
}
}); });
put(Random.class, new ArgProducer() { put(Random.class, random -> new Random(random.nextLong()));
@Override public Object create(Random random) { put(Version.class, random -> Version.LATEST);
return new Random(random.nextLong()); put(AttributeFactory.class, BaseTokenStreamTestCase::newAttributeFactory);
} put(Set.class,random -> {
});
put(Version.class, new ArgProducer() {
@Override public Object create(Random random) {
// we expect bugs in emulating old versions
return Version.LATEST;
}
});
put(AttributeFactory.class, new ArgProducer() {
@Override public Object create(Random random) {
return newAttributeFactory(random);
}
});
put(Set.class, new ArgProducer() {
@Override public Object create(Random random) {
// TypeTokenFilter // TypeTokenFilter
Set<String> set = new HashSet<>(); Set<String> set = new HashSet<>();
int num = random.nextInt(5); int num = random.nextInt(5);
@ -392,10 +335,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]); set.add(StandardTokenizer.TOKEN_TYPES[random.nextInt(StandardTokenizer.TOKEN_TYPES.length)]);
} }
return set; return set;
}
}); });
put(Collection.class, new ArgProducer() { put(Collection.class, random -> {
@Override public Object create(Random random) {
// CapitalizationFilter // CapitalizationFilter
Collection<char[]> col = new ArrayList<>(); Collection<char[]> col = new ArrayList<>();
int num = random.nextInt(5); int num = random.nextInt(5);
@ -403,10 +344,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
col.add(TestUtil.randomSimpleString(random).toCharArray()); col.add(TestUtil.randomSimpleString(random).toCharArray());
} }
return col; return col;
}
}); });
put(CharArraySet.class, new ArgProducer() { put(CharArraySet.class, random -> {
@Override public Object create(Random random) {
int num = random.nextInt(10); int num = random.nextInt(10);
CharArraySet set = new CharArraySet(num, random.nextBoolean()); CharArraySet set = new CharArraySet(num, random.nextBoolean());
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
@ -414,28 +353,13 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
set.add(TestUtil.randomSimpleString(random)); set.add(TestUtil.randomSimpleString(random));
} }
return set; return set;
}
}); });
put(Pattern.class, new ArgProducer() { // TODO: don't want to make the exponentially slow ones Dawid documents
@Override public Object create(Random random) { // in TestPatternReplaceFilter, so dont use truly random patterns (for now)
// TODO: don't want to make the exponentially slow ones Dawid documents put(Pattern.class, random -> Pattern.compile("a"));
// in TestPatternReplaceFilter, so dont use truly random patterns (for now) put(Pattern[].class, random -> new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")});
return Pattern.compile("a"); put(PayloadEncoder.class, random -> new IdentityEncoder()); // the other encoders will throw exceptions if tokens arent numbers?
} put(Dictionary.class, random -> {
});
put(Pattern[].class, new ArgProducer() {
@Override public Object create(Random random) {
return new Pattern[] {Pattern.compile("([a-z]+)"), Pattern.compile("([0-9]+)")};
}
});
put(PayloadEncoder.class, new ArgProducer() {
@Override public Object create(Random random) {
return new IdentityEncoder(); // the other encoders will throw exceptions if tokens arent numbers?
}
});
put(Dictionary.class, new ArgProducer() {
@Override public Object create(Random random) {
// TODO: make nastier // TODO: make nastier
InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff"); InputStream affixStream = TestHunspellStemFilter.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic"); InputStream dictStream = TestHunspellStemFilter.class.getResourceAsStream("simple.dic");
@ -445,10 +369,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
Rethrow.rethrow(ex); Rethrow.rethrow(ex);
return null; // unreachable code return null; // unreachable code
} }
}
}); });
put(HyphenationTree.class, new ArgProducer() { put(HyphenationTree.class, random -> {
@Override public Object create(Random random) {
// TODO: make nastier // TODO: make nastier
try { try {
InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm()); InputSource is = new InputSource(TestCompoundWordTokenFilter.class.getResource("da_UTF8.xml").toExternalForm());
@ -458,10 +380,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
Rethrow.rethrow(ex); Rethrow.rethrow(ex);
return null; // unreachable code return null; // unreachable code
} }
}
}); });
put(SnowballProgram.class, new ArgProducer() { put(SnowballProgram.class, random -> {
@Override public Object create(Random random) {
try { try {
String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)]; String lang = TestSnowball.SNOWBALL_LANGS[random.nextInt(TestSnowball.SNOWBALL_LANGS.length)];
Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class); Class<? extends SnowballProgram> clazz = Class.forName("org.tartarus.snowball.ext." + lang + "Stemmer").asSubclass(SnowballProgram.class);
@ -470,10 +390,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
Rethrow.rethrow(ex); Rethrow.rethrow(ex);
return null; // unreachable code return null; // unreachable code
} }
}
}); });
put(String.class, new ArgProducer() { put(String.class, random -> {
@Override public Object create(Random random) {
// TODO: make nastier // TODO: make nastier
if (random.nextBoolean()) { if (random.nextBoolean()) {
// a token type // a token type
@ -481,10 +399,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} else { } else {
return TestUtil.randomSimpleString(random); return TestUtil.randomSimpleString(random);
} }
}
}); });
put(NormalizeCharMap.class, new ArgProducer() { put(NormalizeCharMap.class, random -> {
@Override public Object create(Random random) {
NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder(); NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
// we can't add duplicate keys, or NormalizeCharMap gets angry // we can't add duplicate keys, or NormalizeCharMap gets angry
Set<String> keys = new HashSet<>(); Set<String> keys = new HashSet<>();
@ -500,10 +416,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} }
} }
return builder.build(); return builder.build();
}
}); });
put(CharacterRunAutomaton.class, new ArgProducer() { put(CharacterRunAutomaton.class, random -> {
@Override public Object create(Random random) {
// TODO: could probably use a purely random automaton // TODO: could probably use a purely random automaton
switch(random.nextInt(5)) { switch(random.nextInt(5)) {
case 0: return MockTokenizer.KEYWORD; case 0: return MockTokenizer.KEYWORD;
@ -512,10 +426,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
case 3: return MockTokenFilter.EMPTY_STOPSET; case 3: return MockTokenFilter.EMPTY_STOPSET;
default: return MockTokenFilter.ENGLISH_STOPSET; default: return MockTokenFilter.ENGLISH_STOPSET;
} }
}
}); });
put(CharArrayMap.class, new ArgProducer() { put(CharArrayMap.class, random -> {
@Override public Object create(Random random) {
int num = random.nextInt(10); int num = random.nextInt(10);
CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean()); CharArrayMap<String> map = new CharArrayMap<>(num, random.nextBoolean());
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
@ -523,10 +435,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random)); map.put(TestUtil.randomSimpleString(random), TestUtil.randomSimpleString(random));
} }
return map; return map;
}
}); });
put(StemmerOverrideMap.class, new ArgProducer() { put(StemmerOverrideMap.class, random -> {
@Override public Object create(Random random) {
int num = random.nextInt(10); int num = random.nextInt(10);
StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean()); StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(random.nextBoolean());
for (int i = 0; i < num; i++) { for (int i = 0; i < num; i++) {
@ -545,11 +455,10 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} catch (Exception ex) { } catch (Exception ex) {
Rethrow.rethrow(ex); Rethrow.rethrow(ex);
return null; // unreachable code return null; // unreachable code
}
} }
}); });
put(SynonymMap.class, new ArgProducer() { put(SynonymMap.class, new Function<Random, Object>() {
@Override public Object create(Random random) { @Override public Object apply(Random random) {
SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean()); SynonymMap.Builder b = new SynonymMap.Builder(random.nextBoolean());
final int numEntries = atLeast(10); final int numEntries = atLeast(10);
for (int j = 0; j < numEntries; j++) { for (int j = 0; j < numEntries; j++) {
@ -578,12 +487,9 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
} }
} }
}); });
put(DateFormat.class, new ArgProducer() { put(DateFormat.class, random -> {
@Override
public Object create(Random random) {
if (random.nextBoolean()) return null; if (random.nextBoolean()) return null;
return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random)); return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random));
}
}); });
}}; }};
@ -608,9 +514,9 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
static <T> T newRandomArg(Random random, Class<T> paramType) { static <T> T newRandomArg(Random random, Class<T> paramType) {
final ArgProducer producer = argProducers.get(paramType); final Function<Random,Object> producer = argProducers.get(paramType);
assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer); assertNotNull("No producer for arguments of type " + paramType.getName() + " found", producer);
return (T) producer.create(random); return (T) producer.apply(random);
} }
static Object[] newTokenizerArgs(Random random, Class<?>[] paramTypes) { static Object[] newTokenizerArgs(Random random, Class<?>[] paramTypes) {
@ -707,7 +613,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
sb.append("filters="); sb.append("filters=");
sb.append(tokenFilterSpec.toString); sb.append(tokenFilterSpec.toString);
sb.append("\n"); sb.append("\n");
sb.append("offsetsAreCorrect=" + tokenFilterSpec.offsetsAreCorrect); sb.append("offsetsAreCorrect=");
sb.append(tokenFilterSpec.offsetsAreCorrect);
return sb.toString(); return sb.toString();
} }
@ -745,12 +652,12 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
private boolean broken(Constructor<?> ctor, Object[] args) { private boolean broken(Constructor<?> ctor, Object[] args) {
final Predicate<Object[]> pred = brokenConstructors.get(ctor); final Predicate<Object[]> pred = brokenConstructors.get(ctor);
return pred != null && pred.apply(args); return pred != null && pred.test(args);
} }
private boolean brokenOffsets(Constructor<?> ctor, Object[] args) { private boolean brokenOffsets(Constructor<?> ctor, Object[] args) {
final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor); final Predicate<Object[]> pred = brokenOffsetsConstructors.get(ctor);
return pred != null && pred.apply(args); return pred != null && pred.test(args);
} }
// create a new random tokenizer from classpath // create a new random tokenizer from classpath