LUCENE-3969: demote the n-grams again (with explanation)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1311915 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-04-10 18:36:34 +00:00
parent ad994d8281
commit c58dfd5516
1 changed files with 16 additions and 9 deletions

View File

@ -113,7 +113,22 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
LimitTokenCountFilter.class,
// Not broken: we forcefully add this, so we shouldn't
// also randomly pick it:
ValidatingTokenFilter.class
ValidatingTokenFilter.class,
// NOTE: these by themselves won't cause any 'basic assertions' to fail.
// but see https://issues.apache.org/jira/browse/LUCENE-3920, if any
// tokenfilter that combines words (e.g. shingles) comes after them,
// this will create bogus offsets because their 'offsets go backwards',
// causing shingle or whatever to make a single token with a
// startOffset thats > its endOffset
// (see LUCENE-3738 for a list of other offenders here)
// broken!
NGramTokenizer.class,
// broken!
NGramTokenFilter.class,
// broken!
EdgeNGramTokenizer.class,
// broken!
EdgeNGramTokenFilter.class
);
}
@ -130,14 +145,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
DictionaryCompoundWordTokenFilter.class,
// nocommit: corrumpts graphs (offset consistency check):
PositionFilter.class,
// broken!
NGramTokenizer.class,
// broken!
NGramTokenFilter.class,
// broken!
EdgeNGramTokenizer.class,
// broken!
EdgeNGramTokenFilter.class,
// nocommit it seems to mess up offsets!?
WikipediaTokenizer.class
);