LUCENE-3969: clean up nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1324834 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-04-11 16:01:07 +00:00
parent 5475644b59
commit c845af5497
5 changed files with 23 additions and 20 deletions

View File

@ -27,13 +27,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.Attribute;
// nocommit rename to OffsetsXXXTF? ie we only validate
// TODO: rename to OffsetsXXXTF? ie we only validate
// offsets (now anyway...)
// TODO: also make a DebuggingTokenFilter, that just prints
// all att values that come through it...
// nocommit BTSTC should just append this to the chain
// TODO: BTSTC should just append this to the chain
// instead of checking itself:
/** A TokenFilter that checks consistency of the tokens (eg
@ -155,7 +155,7 @@ public final class ValidatingTokenFilter extends TokenFilter {
// TODO: what else to validate
// nocommit check that endOffset is >= max(endOffset)
// TODO: check that endOffset is >= max(endOffset)
// we've seen
}

View File

@ -191,7 +191,7 @@ public class HyphenationCompoundWordTokenFilter extends
// we only put subwords to the token stream
// that are longer than minPartSize
if (partLength < this.minSubwordSize) {
// nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}

View File

@ -81,7 +81,6 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
throw new IllegalArgumentException("bufferSize cannot be negative");
}
if (skip < 0) {
// nocommit: not quite right right here: see line 84... if skip > numTokensFound we always get a NegativeArrayException? needs fixing!
throw new IllegalArgumentException("skip cannot be negative");
}
termAtt.resizeBuffer(bufferSize);

View File

@ -195,8 +195,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
checkRandomData(random, analyzer, numRounds);
}
// nocommit: wrong final offset, fix this!
@Ignore
@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
public void testFinalOffsetSpecialCase() throws Exception {
final NormalizeCharMap map = new NormalizeCharMap();
map.add("t", "");
@ -220,8 +219,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
checkAnalysisConsistency(random, analyzer, false, text);
}
// nocommit: this is intended to fail until we fix bugs
@Ignore
@Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
public void testRandomMaps() throws Exception {
for (int i = 0; i < 100; i++) {
final NormalizeCharMap map = randomMap();

View File

@ -81,6 +81,7 @@ import org.apache.lucene.analysis.position.PositionFilter;
import org.apache.lucene.analysis.snowball.TestSnowball;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.synonym.SynonymMap;
import org.apache.lucene.analysis.th.ThaiWordFilter;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
@ -105,7 +106,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// TODO: fix those and remove
private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
static {
// nocommit can we promote some of these to be only
// TODO: can we promote some of these to be only
// offsets offenders?
Collections.<Class<?>>addAll(brokenComponents,
// TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
@ -132,7 +133,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
EdgeNGramTokenizer.class,
// broken!
EdgeNGramTokenFilter.class,
// nocommit: remove this class after we fix its finalOffset bug
// broken!
WordDelimiterFilter.class,
// broken!
TrimFilter.class,
// TODO: remove this class after we fix its finalOffset bug
MappingCharFilter.class
);
}
@ -142,16 +147,16 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
static {
Collections.<Class<?>>addAll(brokenOffsetsComponents,
WordDelimiterFilter.class,
TrimFilter.class,
ReversePathHierarchyTokenizer.class,
PathHierarchyTokenizer.class,
HyphenationCompoundWordTokenFilter.class,
DictionaryCompoundWordTokenFilter.class,
// nocommit: corrumpts graphs (offset consistency check):
// TODO: corrumpts graphs (offset consistency check):
PositionFilter.class,
// nocommit it seems to mess up offsets!?
WikipediaTokenizer.class
// TODO: it seems to mess up offsets!?
WikipediaTokenizer.class,
// TODO: doesn't handle graph inputs
ThaiWordFilter.class
);
}
@ -271,7 +276,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
});
put(char.class, new ArgProducer() {
@Override public Object create(Random random) {
// nocommit: fix any filters that care to throw IAE instead.
// TODO: fix any filters that care to throw IAE instead.
// also add a unicode validating filter to validate termAtt?
// return Character.valueOf((char)random.nextInt(65536));
while(true) {
char c = (char)random.nextInt(65536);
@ -534,7 +540,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
// TODO: maybe the collator one...???
args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
} else if (paramType == AttributeSource.class) {
// nocommit: args[i] = new AttributeSource();
// TODO: args[i] = new AttributeSource();
// this is currently too scary to deal with!
args[i] = null; // force IAE
} else {
@ -583,7 +589,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
}
public boolean offsetsAreCorrect() {
// nocommit: can we not do the full chain here!?
// TODO: can we not do the full chain here!?
Random random = new Random(seed);
TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
@ -717,7 +723,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
while (true) {
final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
// nocommit/hack: MockGraph/MockLookahead has assertions that will trip if they follow
// hack: MockGraph/MockLookahead has assertions that will trip if they follow
// an offsets violator. so we cant use them after e.g. wikipediatokenizer
if (!spec.offsetsAreCorrect &&
(ctor.getDeclaringClass().equals(MockGraphTokenFilter.class)