mirror of https://github.com/apache/lucene.git
LUCENE-1817: Marked analyzers/smartcn as experimental in order to warn users that this API might change in the future due to major refactorings.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@808170 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4c686a3dd1
commit
67eea7215c
|
@ -68,6 +68,11 @@ import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
|
||||||
* <pre>
|
* <pre>
|
||||||
* analysis.data.dir=D:/path/to/analysis-data/
|
* analysis.data.dir=D:/path/to/analysis-data/
|
||||||
* </pre>
|
* </pre>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class SmartChineseAnalyzer extends Analyzer {
|
public class SmartChineseAnalyzer extends Analyzer {
|
||||||
|
|
||||||
|
|
|
@ -3,8 +3,17 @@
|
||||||
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
<div>
|
||||||
Analyzer for Simplified Chinese, which indexes words.
|
Analyzer for Simplified Chinese, which indexes words.
|
||||||
<p>
|
</div>
|
||||||
|
<div>
|
||||||
|
<font color="#FF0000">
|
||||||
|
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
|
||||||
|
introduced here might change in the future and will not be supported anymore
|
||||||
|
in such a case.
|
||||||
|
</font>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
|
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
|
||||||
<ul>
|
<ul>
|
||||||
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
|
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
|
||||||
|
@ -18,7 +27,7 @@ Example phrase: "我是中国人"
|
||||||
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
|
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
|
||||||
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
|
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
|
||||||
</ol>
|
</ol>
|
||||||
</p>
|
</div>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
@ -46,7 +46,11 @@ import java.util.Properties;
|
||||||
* <pre>
|
* <pre>
|
||||||
* analysis.data.dir=D:/path/to/analysis-data/
|
* analysis.data.dir=D:/path/to/analysis-data/
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class AnalyzerProfile {
|
public class AnalyzerProfile {
|
||||||
|
|
|
@ -19,6 +19,11 @@ package org.apache.lucene.analysis.cn.smart;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal SmartChineseAnalyzer character type constants.
|
* Internal SmartChineseAnalyzer character type constants.
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class CharType {
|
public class CharType {
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,11 @@ import org.apache.lucene.util.AttributeSource;
|
||||||
* <p>
|
* <p>
|
||||||
* The output tokens can then be broken into words with {@link WordTokenFilter}
|
* The output tokens can then be broken into words with {@link WordTokenFilter}
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public final class SentenceTokenizer extends Tokenizer {
|
public final class SentenceTokenizer extends Tokenizer {
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,11 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter; // for javadoc
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SmartChineseAnalyzer utility constants and methods
|
* SmartChineseAnalyzer utility constants and methods
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class Utility {
|
public class Utility {
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,11 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Segment a sentence of Chinese text into words.
|
* Segment a sentence of Chinese text into words.
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class WordSegmenter {
|
class WordSegmenter {
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,11 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link TokenFilter} that breaks sentences into words.
|
* A {@link TokenFilter} that breaks sentences into words.
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public final class WordTokenFilter extends TokenFilter {
|
public final class WordTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,11 @@ package org.apache.lucene.analysis.cn.smart;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Internal SmartChineseAnalyzer token type constants
|
* Internal SmartChineseAnalyzer token type constants
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class WordType {
|
public class WordType {
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,11 @@ import java.io.UnsupportedEncodingException;
|
||||||
* <p>
|
* <p>
|
||||||
* Contains methods for dealing with GB2312 encoding.
|
* Contains methods for dealing with GB2312 encoding.
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
abstract class AbstractDictionary {
|
abstract class AbstractDictionary {
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -31,6 +31,11 @@ import org.apache.lucene.analysis.cn.smart.Utility;
|
||||||
* <p>
|
* <p>
|
||||||
* For each start offset, a list of possible token pairs is stored.
|
* For each start offset, a list of possible token pairs is stored.
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class BiSegGraph {
|
class BiSegGraph {
|
||||||
|
|
||||||
|
|
|
@ -34,6 +34,11 @@ import org.apache.lucene.analysis.cn.smart.AnalyzerProfile;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SmartChineseAnalyzer Bigram dictionary.
|
* SmartChineseAnalyzer Bigram dictionary.
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class BigramDictionary extends AbstractDictionary {
|
class BigramDictionary extends AbstractDictionary {
|
||||||
|
|
||||||
|
|
|
@ -25,6 +25,11 @@ import org.apache.lucene.analysis.cn.smart.WordType;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the optimal segmentation of a sentence into Chinese words
|
* Finds the optimal segmentation of a sentence into Chinese words
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class HHMMSegmenter {
|
public class HHMMSegmenter {
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,11 @@ package org.apache.lucene.analysis.cn.smart.hhmm;
|
||||||
* <p>
|
* <p>
|
||||||
* Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
|
* Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class PathNode implements Comparable {
|
class PathNode implements Comparable {
|
||||||
public double weight;
|
public double weight;
|
||||||
|
|
|
@ -28,6 +28,11 @@ import java.util.Map;
|
||||||
* <p>
|
* <p>
|
||||||
* For each start offset, a list of possible tokens is stored.
|
* For each start offset, a list of possible tokens is stored.
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class SegGraph {
|
class SegGraph {
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,11 @@ import org.apache.lucene.analysis.cn.smart.WordType; // for javadocs
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SmartChineseAnalyzer internal token
|
* SmartChineseAnalyzer internal token
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class SegToken {
|
public class SegToken {
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -25,6 +25,11 @@ import org.apache.lucene.analysis.cn.smart.WordType;
|
||||||
* Filters a {@link SegToken} by converting full-width latin to half-width, then lowercasing latin.
|
* Filters a {@link SegToken} by converting full-width latin to half-width, then lowercasing latin.
|
||||||
* Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER}
|
* Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER}
|
||||||
* </p>
|
* </p>
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public class SegTokenFilter {
|
public class SegTokenFilter {
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,11 @@ import java.util.Arrays;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A pair of tokens in {@link SegGraph}
|
* A pair of tokens in {@link SegGraph}
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class SegTokenPair {
|
class SegTokenPair {
|
||||||
|
|
||||||
|
|
|
@ -36,6 +36,11 @@ import org.apache.lucene.analysis.cn.smart.Utility;
|
||||||
/**
|
/**
|
||||||
* SmartChineseAnalyzer Word Dictionary
|
* SmartChineseAnalyzer Word Dictionary
|
||||||
*
|
*
|
||||||
|
* <p><font color="#FF0000">
|
||||||
|
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
|
||||||
|
* The APIs introduced here might change in the future and will not be
|
||||||
|
* supported anymore in such a case.</font>
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
class WordDictionary extends AbstractDictionary {
|
class WordDictionary extends AbstractDictionary {
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,14 @@
|
||||||
<html><head></head>
|
<html><head></head>
|
||||||
<body>
|
<body>
|
||||||
|
<div>
|
||||||
SmartChineseAnalyzer Hidden Markov Model package
|
SmartChineseAnalyzer Hidden Markov Model package
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<font color="#FF0000">
|
||||||
|
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
|
||||||
|
introduced here might change in the future and will not be supported anymore
|
||||||
|
in such a case.
|
||||||
|
</font>
|
||||||
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,5 +1,14 @@
|
||||||
<html><head></head>
|
<html><head></head>
|
||||||
<body>
|
<body>
|
||||||
|
<div>
|
||||||
SmartChineseAnalyzer Tokenizers and TokenFilters
|
SmartChineseAnalyzer Tokenizers and TokenFilters
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<font color="#FF0000">
|
||||||
|
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
|
||||||
|
introduced here might change in the future and will not be supported anymore
|
||||||
|
in such a case.
|
||||||
|
</font>
|
||||||
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
Loading…
Reference in New Issue