mirror of https://github.com/apache/lucene.git
LUCENE-2901: fix consistency of KeywordMarkerFilter, it should only set, not unset the attribute
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1065621 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cd8929f9f5
commit
5ccf063a5d
|
@ -747,8 +747,8 @@ New features
|
|||
stopwords, and implement many analyzers in contrib with it.
|
||||
(Simon Willnauer via Robert Muir)
|
||||
|
||||
* LUCENE-2198: Support protected words in stemming TokenFilters using a
|
||||
new KeywordAttribute. (Simon Willnauer via Uwe Schindler)
|
||||
* LUCENE-2198, LUCENE-2901: Support protected words in stemming TokenFilters using a
|
||||
new KeywordAttribute. (Simon Willnauer, Drew Farris via Uwe Schindler)
|
||||
|
||||
* LUCENE-2183, LUCENE-2240, LUCENE-2241: Added Unicode 4 support
|
||||
to CharTokenizer and its subclasses. CharTokenizer now has new
|
||||
|
|
|
@ -74,10 +74,12 @@ public final class KeywordMarkerFilter extends TokenFilter {
|
|||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
|
||||
termAtt.length()));
|
||||
if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) {
|
||||
keywordAttr.setKeyword(true);
|
||||
}
|
||||
return true;
|
||||
} else
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.analysis.miscellaneous;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
@ -57,6 +58,19 @@ public class TestKeywordMarkerFilter extends BaseTokenStreamTestCase {
|
|||
"The quIck browN LuceneFox Jumps")), set2)), output);
|
||||
}
|
||||
|
||||
// LUCENE-2901
|
||||
public void testComposition() throws Exception {
|
||||
TokenStream ts = new LowerCaseFilterMock(
|
||||
new KeywordMarkerFilter(
|
||||
new KeywordMarkerFilter(
|
||||
new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
new StringReader("Dogs Trees Birds Houses")),
|
||||
new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))),
|
||||
new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
|
||||
|
||||
assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
|
||||
}
|
||||
|
||||
public static final class LowerCaseFilterMock extends TokenFilter {
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
|
Loading…
Reference in New Issue