LUCENE-2901: fix consistency of KeywordMarkerFilter, it should only set, not unset the attribute

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1065621 13f79535-47bb-0310-9956-ffa450edef68
2011-01-31 14:06:45 +00:00 · 2011-01-31 14:06:45 +00:00 · 5ccf063a5d
parent cd8929f9f5
commit 5ccf063a5d
3 changed files with 21 additions and 5 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -747,8 +747,8 @@ New features
  stopwords, and implement many analyzers in contrib with it.  
  (Simon Willnauer via Robert Muir)
  
-* LUCENE-2198: Support protected words in stemming TokenFilters using a
-  new KeywordAttribute.  (Simon Willnauer via Uwe Schindler)
+* LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a
+  new KeywordAttribute.  (Simon Willnauer, Drew Farris via Uwe Schindler)
  
 * LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support
  to CharTokenizer and its subclasses. CharTokenizer now has new
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
@ -74,10 +74,12 @@ public final class KeywordMarkerFilter extends TokenFilter {
  @Override
  public final boolean incrementToken() throws IOException {
    if (input.incrementToken()) {
-      keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
-          termAtt.length()));
+      if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) { 
+        keywordAttr.setKeyword(true);
+      }
      return true;
-    } else
+    } else {
      return false;
+    }
  }
 }
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
@ -2,6 +2,7 @@ package org.apache.lucene.analysis.miscellaneous;

 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Locale;
 import java.util.Set;
@ -57,6 +58,19 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
            "The quIck browN LuceneFox Jumps")), set2)), output);
  }

+  // LUCENE-2901
+  public void testComposition() throws Exception {   
+    TokenStream ts = new LowerCaseFilterMock(
+                     new KeywordMarkerFilter(
+                     new KeywordMarkerFilter(
+                     new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+                     new StringReader("Dogs Trees Birds Houses")),
+                     new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))), 
+                     new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
+    
+    assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+  }
+  
  public static final class LowerCaseFilterMock extends TokenFilter {

    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);