LUCENE-3969: clean up nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3969@1324834 13f79535-47bb-0310-9956-ffa450edef68
2025-03-06 00:09:28 +00:00 · 2012-04-11 16:01:07 +00:00 · 2012-04-11 16:01:07 +00:00 · c845af5497
commit c845af5497
parent 5475644b59
5 changed files with 23 additions and 20 deletions
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/ValidatingTokenFilter.java
@ -27,13 +27,13 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.Attribute;

-// nocommit rename to OffsetsXXXTF?  ie we only validate
+// TODO: rename to OffsetsXXXTF?  ie we only validate
 // offsets (now anyway...)

 // TODO: also make a DebuggingTokenFilter, that just prints
 // all att values that come through it...

-// nocommit BTSTC should just append this to the chain
+// TODO: BTSTC should just append this to the chain
 // instead of checking itself:

 /** A TokenFilter that checks consistency of the tokens (eg
@ -155,7 +155,7 @@ public final class ValidatingTokenFilter extends TokenFilter {

    // TODO: what else to validate

-    // nocommit check that endOffset is >= max(endOffset)
+    // TODO: check that endOffset is >= max(endOffset)
    // we've seen
  }

--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
@ -191,7 +191,7 @@ public class HyphenationCompoundWordTokenFilter extends
        // we only put subwords to the token stream
        // that are longer than minPartSize
        if (partLength < this.minSubwordSize) {
-          // nocommit/BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
+          // BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the 
          // calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
          continue;
        }
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
@ -81,7 +81,6 @@ public class ReversePathHierarchyTokenizer extends Tokenizer {
      throw new IllegalArgumentException("bufferSize cannot be negative");
    }
    if (skip < 0) {
-      // nocommit: not quite right right here: see line 84... if skip > numTokensFound we always get a NegativeArrayException? needs fixing!
      throw new IllegalArgumentException("skip cannot be negative");
    }
    termAtt.resizeBuffer(bufferSize);
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
@ -195,8 +195,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
    checkRandomData(random, analyzer, numRounds);
  }
  
-  // nocommit: wrong final offset, fix this!
-  @Ignore
+  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
  public void testFinalOffsetSpecialCase() throws Exception {  
    final NormalizeCharMap map = new NormalizeCharMap();
    map.add("t", "");
@ -220,8 +219,7 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
    checkAnalysisConsistency(random, analyzer, false, text);
  }
  
-  // nocommit: this is intended to fail until we fix bugs
-  @Ignore
+  @Ignore("wrong finalOffset: https://issues.apache.org/jira/browse/LUCENE-3971")
  public void testRandomMaps() throws Exception {
    for (int i = 0; i < 100; i++) {
      final NormalizeCharMap map = randomMap();
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@ -81,6 +81,7 @@ import org.apache.lucene.analysis.position.PositionFilter;
 import org.apache.lucene.analysis.snowball.TestSnowball;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.synonym.SynonymMap;
+import org.apache.lucene.analysis.th.ThaiWordFilter;
 import org.apache.lucene.analysis.util.CharArrayMap;
 import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.AttributeSource.AttributeFactory;
@ -105,7 +106,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
  // TODO: fix those and remove
  private static final Set<Class<?>> brokenComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
  static {
-    // nocommit can we promote some of these to be only
+    // TODO: can we promote some of these to be only
    // offsets offenders?
    Collections.<Class<?>>addAll(brokenComponents,
      // TODO: fix basetokenstreamtestcase not to trip because this one has no CharTermAtt
@ -132,7 +133,11 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
      EdgeNGramTokenizer.class,
      // broken!
      EdgeNGramTokenFilter.class,
-      // nocommit: remove this class after we fix its finalOffset bug
+      // broken!
+      WordDelimiterFilter.class,
+      // broken!
+      TrimFilter.class,
+      // TODO: remove this class after we fix its finalOffset bug
      MappingCharFilter.class
    );
  }
@ -142,16 +147,16 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
  private static final Set<Class<?>> brokenOffsetsComponents = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
  static {
    Collections.<Class<?>>addAll(brokenOffsetsComponents,
-      WordDelimiterFilter.class,
-      TrimFilter.class,
      ReversePathHierarchyTokenizer.class,
      PathHierarchyTokenizer.class,
      HyphenationCompoundWordTokenFilter.class,
      DictionaryCompoundWordTokenFilter.class,
-      // nocommit: corrumpts graphs (offset consistency check):
+      // TODO: corrumpts graphs (offset consistency check):
      PositionFilter.class,
-      // nocommit it seems to mess up offsets!?
-      WikipediaTokenizer.class
+      // TODO: it seems to mess up offsets!?
+      WikipediaTokenizer.class,
+      // TODO: doesn't handle graph inputs
+      ThaiWordFilter.class
    );
  }
  
@ -271,7 +276,8 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
    });
    put(char.class, new ArgProducer() {
      @Override public Object create(Random random) {
-        // nocommit: fix any filters that care to throw IAE instead.
+        // TODO: fix any filters that care to throw IAE instead.
+        // also add a unicode validating filter to validate termAtt?
        // return Character.valueOf((char)random.nextInt(65536));
        while(true) {
          char c = (char)random.nextInt(65536);
@ -534,7 +540,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
        // TODO: maybe the collator one...???
        args[i] = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
      } else if (paramType == AttributeSource.class) {
-        // nocommit: args[i] = new AttributeSource();
+        // TODO: args[i] = new AttributeSource();
        // this is currently too scary to deal with!
        args[i] = null; // force IAE
      } else {
@ -583,7 +589,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
    }

    public boolean offsetsAreCorrect() {
-      // nocommit: can we not do the full chain here!?
+      // TODO: can we not do the full chain here!?
      Random random = new Random(seed);
      TokenizerSpec tokenizerSpec = newTokenizer(random, new StringReader(""));
      TokenFilterSpec filterSpec = newFilterChain(random, tokenizerSpec.tokenizer, tokenizerSpec.offsetsAreCorrect);
@ -717,7 +723,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
        while (true) {
          final Constructor<? extends TokenFilter> ctor = tokenfilters.get(random.nextInt(tokenfilters.size()));
          
-          // nocommit/hack: MockGraph/MockLookahead has assertions that will trip if they follow
+          // hack: MockGraph/MockLookahead has assertions that will trip if they follow
          // an offsets violator. so we cant use them after e.g. wikipediatokenizer
          if (!spec.offsetsAreCorrect &&
              (ctor.getDeclaringClass().equals(MockGraphTokenFilter.class)