LUCENE-2238: deprecated ChineseAnalyzer / ChineseTokenizer in favor of StandardAnalyzer / Tokenizer which does the same thing

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@904521 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2010-01-29 15:44:54 +00:00
parent 49b3a12971
commit 537bb742cd
6 changed files with 18 additions and 11 deletions

View File

@ -21,15 +21,17 @@ import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents; // javadoc @link
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
/**
* An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and
* filters with {@link ChineseFilter}
*
* @deprecated Use {@link StandardAnalyzer} instead, which has the same functionality.
* This analyzer will be removed in Lucene 4.0
*/
@Deprecated
public final class ChineseAnalyzer extends ReusableAnalyzerBase {
/**

View File

@ -23,6 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
@ -41,9 +42,10 @@ import org.apache.lucene.util.Version;
* </ol>
*
* @version 1.0
*
* @deprecated Use {@link StopFilter} instead, which has the same functionality.
* This filter will be removed in Lucene 4.0
*/
@Deprecated
public final class ChineseFilter extends TokenFilter {

View File

@ -21,6 +21,7 @@ package org.apache.lucene.analysis.cn;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@ -52,9 +53,10 @@ import org.apache.lucene.util.AttributeSource;
* CJKTokenizer will not work.
* </p>
* @version 1.0
*
* @deprecated Use {@link StandardTokenizer} instead, which has the same functionality.
* This filter will be removed in Lucene 4.0
*/
@Deprecated
public final class ChineseTokenizer extends Tokenizer {

View File

@ -24,14 +24,14 @@ Analyzer for Chinese, which indexes unigrams (individual chinese characters).
<p>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul>
<li>ChineseAnalyzer (in this package): Index unigrams (individual Chinese characters) as a token.
<li>StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
<li>SmartChineseAnalyzer (in the analyzers/smartcn package): Index words (attempt to segment Chinese text into words) as tokens.
</ul>
Example phrase "我是中国人"
<ol>
<li>ChineseAnalyzer: 我-是-中-国-人</li>
<li>StandardAnalyzer: 我-是-中-国-人</li>
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
</ol>

View File

@ -24,11 +24,12 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
/** @deprecated Remove this test when ChineseAnalyzer is removed. */
@Deprecated
public class TestChineseTokenizer extends BaseTokenStreamTestCase
{
public void testOtherLetterOffset() throws IOException

View File

@ -33,14 +33,14 @@ in such a case.
<div>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul>
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
<li>StandardAnalyzer: Index unigrams (individual Chinese characters) as a token.
<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
<li>SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens.
</ul>
Example phrase "我是中国人"
<ol>
<li>ChineseAnalyzer: 我-是-中-国-人</li>
<li>StandardAnalyzer: 我-是-中-国-人</li>
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
</ol>