LUCENE-1882: move SmartChineseAnalyzer to the 'correct' package ... this commit is based on a sequence of svn commands and a patch provided by Robert Muir in LUCENE-1862

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@810208 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2009-09-01 20:10:33 +00:00
parent 566aaf28e7
commit e5cb7f668a
21 changed files with 20 additions and 70 deletions

View File

@ -1,50 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<div>
Analyzer for Simplified Chinese, which indexes words.
</div>
<div>
<font color="#FF0000">
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
and file formats introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</div>
<div>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul>
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
<li>SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens.
</ul>
Example phrase "我是中国人"
<ol>
<li>ChineseAnalyzer: 我-是-中-国-人</li>
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
</ol>
</div>
</body>
</html>

View File

@ -28,7 +28,7 @@ import java.util.Properties;
* SmartChineseAnalyzer has a built-in dictionary and stopword list out-of-box. * SmartChineseAnalyzer has a built-in dictionary and stopword list out-of-box.
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cn.smart;
/** /**
* Internal SmartChineseAnalyzer character type constants. * Internal SmartChineseAnalyzer character type constants.
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.AttributeSource;
* The output tokens can then be broken into words with {@link WordTokenFilter} * The output tokens can then be broken into words with {@link WordTokenFilter}
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -15,7 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.analysis.cn; package org.apache.lucene.analysis.cn.smart;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -51,7 +51,7 @@ import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
* Thanks to ICTCLAS for their hard work, and for contributing the data under the Apache 2 License! * Thanks to ICTCLAS for their hard work, and for contributing the data under the Apache 2 License!
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter; // for javadoc
/** /**
* SmartChineseAnalyzer utility constants and methods * SmartChineseAnalyzer utility constants and methods
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter;
/** /**
* Segment a sentence of Chinese text into words. * Segment a sentence of Chinese text into words.
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/** /**
* A {@link TokenFilter} that breaks sentences into words. * A {@link TokenFilter} that breaks sentences into words.
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cn.smart;
/** /**
* Internal SmartChineseAnalyzer token type constants * Internal SmartChineseAnalyzer token type constants
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -27,7 +27,7 @@ import java.io.UnsupportedEncodingException;
* Contains methods for dealing with GB2312 encoding. * Contains methods for dealing with GB2312 encoding.
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -32,7 +32,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
* For each start offset, a list of possible token pairs is stored. * For each start offset, a list of possible token pairs is stored.
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -35,7 +35,7 @@ import org.apache.lucene.analysis.cn.smart.AnalyzerProfile;
/** /**
* SmartChineseAnalyzer Bigram dictionary. * SmartChineseAnalyzer Bigram dictionary.
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.PathNode;//javadoc @link
/** /**
* Finds the optimal segmentation of a sentence into Chinese words * Finds the optimal segmentation of a sentence into Chinese words
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -23,7 +23,7 @@ package org.apache.lucene.analysis.cn.smart.hhmm;
* Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm. * Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -29,7 +29,7 @@ import java.util.Map;
* For each start offset, a list of possible tokens is stored. * For each start offset, a list of possible tokens is stored.
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -24,7 +24,7 @@ import org.apache.lucene.analysis.cn.smart.WordType; // for javadocs
/** /**
* SmartChineseAnalyzer internal token * SmartChineseAnalyzer internal token
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.cn.smart.WordType;
* Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER} * Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER}
* </p> * </p>
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -22,7 +22,7 @@ import java.util.Arrays;
/** /**
* A pair of tokens in {@link SegGraph} * A pair of tokens in {@link SegGraph}
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -37,7 +37,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
* SmartChineseAnalyzer Word Dictionary * SmartChineseAnalyzer Word Dictionary
* *
* <p><font color="#FF0000"> * <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be * The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font> * supported anymore in such a case.</font>
* </p> * </p>

View File

@ -15,7 +15,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.analysis.cn; package org.apache.lucene.analysis.cn.smart;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;