LUCENE-1817: Marked analyzers/smartcn as experimental in order to warn users that this API might change in the future due to major refactorings.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@808170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2009-08-26 20:21:50 +00:00
parent 4c686a3dd1
commit 67eea7215c
21 changed files with 119 additions and 3 deletions

View File

@ -68,6 +68,11 @@ import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
* <pre> * <pre>
* analysis.data.dir=D:/path/to/analysis-data/ * analysis.data.dir=D:/path/to/analysis-data/
* </pre> * </pre>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class SmartChineseAnalyzer extends Analyzer { public class SmartChineseAnalyzer extends Analyzer {

View File

@ -3,8 +3,17 @@
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"> <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head> </head>
<body> <body>
<div>
Analyzer for Simplified Chinese, which indexes words. Analyzer for Simplified Chinese, which indexes words.
<p> </div>
<div>
<font color="#FF0000">
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</div>
<div>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way. Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul> <ul>
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token. <li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
@ -18,7 +27,7 @@ Example phrase "我是中国人"
<li>CJKAnalyzer: 我是-是中-中国-国人</li> <li>CJKAnalyzer: 我是-是中-中国-国人</li>
<li>SmartChineseAnalyzer: 我-是-中国-人</li> <li>SmartChineseAnalyzer: 我-是-中国-人</li>
</ol> </ol>
</p> </div>
</body> </body>
</html> </html>

View File

@ -46,7 +46,11 @@ import java.util.Properties;
* <pre> * <pre>
* analysis.data.dir=D:/path/to/analysis-data/ * analysis.data.dir=D:/path/to/analysis-data/
* </pre> * </pre>
* * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
* *
*/ */
public class AnalyzerProfile { public class AnalyzerProfile {

View File

@ -19,6 +19,11 @@ package org.apache.lucene.analysis.cn.smart;
/** /**
* Internal SmartChineseAnalyzer character type constants. * Internal SmartChineseAnalyzer character type constants.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class CharType { public class CharType {

View File

@ -31,6 +31,11 @@ import org.apache.lucene.util.AttributeSource;
* <p> * <p>
* The output tokens can then be broken into words with {@link WordTokenFilter} * The output tokens can then be broken into words with {@link WordTokenFilter}
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public final class SentenceTokenizer extends Tokenizer { public final class SentenceTokenizer extends Tokenizer {

View File

@ -21,6 +21,11 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter; // for javadoc
/** /**
* SmartChineseAnalyzer utility constants and methods * SmartChineseAnalyzer utility constants and methods
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class Utility { public class Utility {

View File

@ -26,6 +26,11 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter;
/** /**
* Segment a sentence of Chinese text into words. * Segment a sentence of Chinese text into words.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class WordSegmenter { class WordSegmenter {

View File

@ -30,6 +30,11 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/** /**
* A {@link TokenFilter} that breaks sentences into words. * A {@link TokenFilter} that breaks sentences into words.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public final class WordTokenFilter extends TokenFilter { public final class WordTokenFilter extends TokenFilter {

View File

@ -19,6 +19,11 @@ package org.apache.lucene.analysis.cn.smart;
/** /**
* Internal SmartChineseAnalyzer token type constants * Internal SmartChineseAnalyzer token type constants
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class WordType { public class WordType {

View File

@ -26,6 +26,11 @@ import java.io.UnsupportedEncodingException;
* <p> * <p>
* Contains methods for dealing with GB2312 encoding. * Contains methods for dealing with GB2312 encoding.
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
abstract class AbstractDictionary { abstract class AbstractDictionary {
/** /**

View File

@ -31,6 +31,11 @@ import org.apache.lucene.analysis.cn.smart.Utility;
* <p> * <p>
* For each start offset, a list of possible token pairs is stored. * For each start offset, a list of possible token pairs is stored.
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class BiSegGraph { class BiSegGraph {

View File

@ -34,6 +34,11 @@ import org.apache.lucene.analysis.cn.smart.AnalyzerProfile;
/** /**
* SmartChineseAnalyzer Bigram dictionary. * SmartChineseAnalyzer Bigram dictionary.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class BigramDictionary extends AbstractDictionary { class BigramDictionary extends AbstractDictionary {

View File

@ -25,6 +25,11 @@ import org.apache.lucene.analysis.cn.smart.WordType;
/** /**
* Finds the optimal segmentation of a sentence into Chinese words * Finds the optimal segmentation of a sentence into Chinese words
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class HHMMSegmenter { public class HHMMSegmenter {

View File

@ -22,6 +22,11 @@ package org.apache.lucene.analysis.cn.smart.hhmm;
* <p> * <p>
* Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm. * Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class PathNode implements Comparable { class PathNode implements Comparable {
public double weight; public double weight;

View File

@ -28,6 +28,11 @@ import java.util.Map;
* <p> * <p>
* For each start offset, a list of possible tokens is stored. * For each start offset, a list of possible tokens is stored.
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class SegGraph { class SegGraph {

View File

@ -23,6 +23,11 @@ import org.apache.lucene.analysis.cn.smart.WordType; // for javadocs
/** /**
* SmartChineseAnalyzer internal token * SmartChineseAnalyzer internal token
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class SegToken { public class SegToken {
/** /**

View File

@ -25,6 +25,11 @@ import org.apache.lucene.analysis.cn.smart.WordType;
* Filters a {@link SegToken} by converting full-width latin to half-width, then lowercasing latin. * Filters a {@link SegToken} by converting full-width latin to half-width, then lowercasing latin.
* Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER} * Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER}
* </p> * </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
public class SegTokenFilter { public class SegTokenFilter {

View File

@ -21,6 +21,11 @@ import java.util.Arrays;
/** /**
* A pair of tokens in {@link SegGraph} * A pair of tokens in {@link SegGraph}
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class SegTokenPair { class SegTokenPair {

View File

@ -36,6 +36,11 @@ import org.apache.lucene.analysis.cn.smart.Utility;
/** /**
* SmartChineseAnalyzer Word Dictionary * SmartChineseAnalyzer Word Dictionary
* *
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>
*/ */
class WordDictionary extends AbstractDictionary { class WordDictionary extends AbstractDictionary {

View File

@ -1,5 +1,14 @@
<html><head></head> <html><head></head>
<body> <body>
<div>
SmartChineseAnalyzer Hidden Markov Model package SmartChineseAnalyzer Hidden Markov Model package
</div>
<div>
<font color="#FF0000">
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</div>
</body> </body>
</html> </html>

View File

@ -1,5 +1,14 @@
<html><head></head> <html><head></head>
<body> <body>
<div>
SmartChineseAnalyzer Tokenizers and TokenFilters SmartChineseAnalyzer Tokenizers and TokenFilters
</div>
<div>
<font color="#FF0000">
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</div>
</body> </body>
</html> </html>