mirror of https://github.com/apache/lucene.git
LUCENE-3969: clean up nocommits
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1324834 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5475644b59
commit
c845af5497
|
@ -27,13 +27,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
|
||||
// nocommit rename to OffsetsXXXTF? ie we only validate
|
||||
// TODO: rename to OffsetsXXXTF? ie we only validate
|
||||
// offsets (now anyway...)
|
||||
|
||||
// TODO: also make a DebuggingTokenFilter, that just prints
|
||||
// all att values that come through it...
|
||||
|
||||
// nocommit BTSTC should just append this to the chain
|
||||
// TODO: BTSTC should just append this to the chain
|
||||
// instead of checking itself:
|
||||
|
||||
/** A TokenFilter that checks consistency of the tokens (eg
|
||||
|
@ -155,7 +155,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
|
|||
|
||||
// TODO: what else to validate
|
||||
|
||||
// nocommit check that endOffset is >= max(endOffset)
|
||||
// TODO: check that endOffset is >= max(endOffset)
|
||||
// we've seen
|
||||
}
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
// we only put subwords to the token stream
|
||||
// that are longer than minPartSize
|
||||
if (partLength < this.minSubwordSize) {
|
||||
// nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
|
||||
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
|
||||
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -81,7 +81,6 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
|
|||
throw new IllegalArgumentException("bufferSize cannot be negative");
|
||||
}
|
||||
if (skip < 0) {
|
||||
// nocommit: not quite right right here: see line 84... if skip > numTokensFound we always get a NegativeArrayException? needs fixing!
|
||||
throw new IllegalArgumentException("skip cannot be negative");
|
||||
}
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
|
|
|
@ -195,8 +195,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
checkRandomData(random, analyzer, numRounds);
|
||||
}
|
||||
|
||||
// nocommit: wrong final offset, fix this!
|
||||
@Ignore
|
||||
@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
|
||||
public void testFinalOffsetSpecialCase() throws Exception {
|
||||
final NormalizeCharMap map = new NormalizeCharMap();
|
||||
map.add("t", "");
|
||||
|
@ -220,8 +219,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
|
|||
checkAnalysisConsistency(random, analyzer, false, text);
|
||||
}
|
||||
|
||||
// nocommit: this is intended to fail until we fix bugs
|
||||
@Ignore
|
||||
@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
|
||||
public void testRandomMaps() throws Exception {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
final NormalizeCharMap map = randomMap();
|
||||
|
|
|
@ -81,6 +81,7 @@ import org.apache.lucene.analysis.position.PositionFilter;
|
|||
import org.apache.lucene.analysis.snowball.TestSnowball;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.synonym.SynonymMap;
|
||||
import org.apache.lucene.analysis.th.ThaiWordFilter;
|
||||
import org.apache.lucene.analysis.util.CharArrayMap;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
|
@ -105,7 +106,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// TODO: fix those and remove
|
||||
private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
|
||||
static {
|
||||
// nocommit can we promote some of these to be only
|
||||
// TODO: can we promote some of these to be only
|
||||
// offsets offenders?
|
||||
Collections.<Class<?>>addAll(brokenComponents,
|
||||
// TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
|
||||
|
@ -132,7 +133,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
EdgeNGramTokenizer.class,
|
||||
// broken!
|
||||
EdgeNGramTokenFilter.class,
|
||||
// nocommit: remove this class after we fix its finalOffset bug
|
||||
// broken!
|
||||
WordDelimiterFilter.class,
|
||||
// broken!
|
||||
TrimFilter.class,
|
||||
// TODO: remove this class after we fix its finalOffset bug
|
||||
MappingCharFilter.class
|
||||
);
|
||||
}
|
||||
|
@ -142,16 +147,16 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
|
||||
static {
|
||||
Collections.<Class<?>>addAll(brokenOffsetsComponents,
|
||||
WordDelimiterFilter.class,
|
||||
TrimFilter.class,
|
||||
ReversePathHierarchyTokenizer.class,
|
||||
PathHierarchyTokenizer.class,
|
||||
HyphenationCompoundWordTokenFilter.class,
|
||||
DictionaryCompoundWordTokenFilter.class,
|
||||
// nocommit: corrumpts graphs (offset consistency check):
|
||||
// TODO: corrumpts graphs (offset consistency check):
|
||||
PositionFilter.class,
|
||||
// nocommit it seems to mess up offsets!?
|
||||
WikipediaTokenizer.class
|
||||
// TODO: it seems to mess up offsets!?
|
||||
WikipediaTokenizer.class,
|
||||
// TODO: doesn't handle graph inputs
|
||||
ThaiWordFilter.class
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -271,7 +276,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
});
|
||||
put(char.class, new ArgProducer() {
|
||||
@Override public Object create(Random random) {
|
||||
// nocommit: fix any filters that care to throw IAE instead.
|
||||
// TODO: fix any filters that care to throw IAE instead.
|
||||
// also add a unicode validating filter to validate termAtt?
|
||||
// return Character.valueOf((char)random.nextInt(65536));
|
||||
while(true) {
|
||||
char c = (char)random.nextInt(65536);
|
||||
|
@ -534,7 +540,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
// TODO: maybe the collator one...???
|
||||
args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
|
||||
} else if (paramType == AttributeSource.class) {
|
||||
// nocommit: args[i] = new AttributeSource();
|
||||
// TODO: args[i] = new AttributeSource();
|
||||
// this is currently too scary to deal with!
|
||||
args[i] = null; // force IAE
|
||||
} else {
|
||||
|
@ -583,7 +589,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public boolean offsetsAreCorrect() {
|
||||
// nocommit: can we not do the full chain here!?
|
||||
// TODO: can we not do the full chain here!?
|
||||
Random random = new Random(seed);
|
||||
TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
|
||||
TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
|
||||
|
@ -717,7 +723,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
|
|||
while (true) {
|
||||
final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
|
||||
|
||||
// nocommit/hack: MockGraph/MockLookahead has assertions that will trip if they follow
|
||||
// hack: MockGraph/MockLookahead has assertions that will trip if they follow
|
||||
// an offsets violator. so we cant use them after e.g. wikipediatokenizer
|
||||
if (!spec.offsetsAreCorrect &&
|
||||
(ctor.getDeclaringClass().equals(MockGraphTokenFilter.class)
|
||||
|
|
Loading…
Reference in New Issue