docs for KeywordRepeatFilter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1454384 13f79535-47bb-0310-9956-ffa450edef68
2025-02-09 11:35:14 +00:00 · 2013-03-08 13:41:51 +00:00 · 2013-03-08 13:41:51 +00:00 · c6ac3031c1
commit c6ac3031c1
parent 5a3ec2d457
5 changed files with 43 additions and 7 deletions
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemFilter.java
@ -32,6 +32,18 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 * Conference on Research and Development in Information Retrieval, 191-203, 1993).
 * <p/>
 * All terms must already be lowercased for this filter to work correctly.
+ *
+ * <p>
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ * </p>
+ *
+ *
 */

 public final class KStemFilter extends TokenFilter {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/PorterStemFilter.java
@ -48,6 +48,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
    certain terms from being passed to the stemmer
    {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
    in a previous {@link TokenStream}.
+
+    Note: For including the original term as well as the stemmed version, see
+   {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
    </p>
 */
 public final class PorterStemFilter extends TokenFilter {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/HunspellStemFilter.java
@ -30,6 +30,18 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 /**
 * TokenFilter that uses hunspell affix rules and words to stem tokens.  Since hunspell supports a word having multiple
 * stems, this filter can emit multiple tokens for each consumed token
+ *
+ * <p>
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ * </p>
+ *
+ *
 */
 public final class HunspellStemFilter extends TokenFilter {
  
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordRepeatFilterFactory.java
@ -22,13 +22,10 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;

 /**
 * Factory for {@link KeywordRepeatFilter}.
- * <pre class="prettyprint" >
- * &lt;fieldType name="text_keyword" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.WhitespaceTokenizerFactory"/&gt;
- *     &lt;filter class="solr.KeywordRepeatFilter"/&gt;
- *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre>
+ *
+ * Since {@link KeywordRepeatFilter} emits two tokens for every input token, and any tokens that aren't transformed
+ * later in the analysis chain will be in the document twice. Therefore, consider adding
+ * {@link RemoveDuplicatesTokenFilterFactory} later in the analysis chain.
 */
 public final class KeywordRepeatFilterFactory extends TokenFilterFactory {
  @Override
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@ -37,6 +37,18 @@ import org.tartarus.snowball.SnowballProgram;
 *  <li>For other languages, see {@link LowerCaseFilter}.
 * </ul>
 * </p>
+ *
+ * <p>
+ * Note: This filter is aware of the {@link KeywordAttribute}. To prevent
+ * certain terms from being passed to the stemmer
+ * {@link KeywordAttribute#isKeyword()} should be set to <code>true</code>
+ * in a previous {@link TokenStream}.
+ *
+ * Note: For including the original term as well as the stemmed version, see
+ * {@link org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory}
+ * </p>
+ *
+ *
 */
 public final class SnowballFilter extends TokenFilter {