mirror of https://github.com/apache/lucene.git
LUCENE-8273: Don't wrap MinHashFilter in a condition
MinHashFilter needs to consume the entire tokenstream, so wrapping it in a randomized condition makes no sense, and breaks offsets.
This commit is contained in:
parent
0bf1eae92c
commit
24c186eff9
|
@ -71,6 +71,7 @@ import org.apache.lucene.analysis.compound.TestCompoundWordTokenFilter;
|
||||||
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
|
||||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||||
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
|
import org.apache.lucene.analysis.hunspell.TestHunspellStemFilter;
|
||||||
|
import org.apache.lucene.analysis.minhash.MinHashFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
|
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
|
import org.apache.lucene.analysis.miscellaneous.DelimitedTermFrequencyTokenFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
|
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
|
||||||
|
@ -120,6 +121,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
||||||
static {
|
static {
|
||||||
// Fingerprint filter needs to consume the whole tokenstream, so conditionals don't make sense here
|
// Fingerprint filter needs to consume the whole tokenstream, so conditionals don't make sense here
|
||||||
avoidConditionals.add(FingerprintFilter.class);
|
avoidConditionals.add(FingerprintFilter.class);
|
||||||
|
// Ditto MinHashFilter
|
||||||
|
avoidConditionals.add(MinHashFilter.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
|
private static final Map<Constructor<?>,Predicate<Object[]>> brokenConstructors = new HashMap<>();
|
||||||
|
|
Loading…
Reference in New Issue