LUCENE-8273: Don't wrap MinHashFilter in a condition

MinHashFilter needs to consume the entire tokenstream, so wrapping it in a
randomized condition makes no sense, and breaks offsets.
This commit is contained in:
Alan Woodward 2018-05-22 09:08:23 +01:00
parent 0bf1eae92c
commit 24c186eff9
1 changed files with 3 additions and 0 deletions

View File

@ -71,6 +71,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
import org.apache.lucene.analysis.minhash.MinHashFilter;
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
@ -120,6 +121,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
static {
// Fingerprint filter needs to consume the whole tokenstream, so conditionals don't make sense here
avoidConditionals.add(FingerprintFilter.class);
// Ditto MinHashFilter
avoidConditionals.add(MinHashFilter.class);
}
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();