LUCENE-1882: move SmartChineseAnalyzer to the 'correct' package ... this commit is based on a sequence of svn commands and a patch provided by Robert Muir in LUCENE-1862

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@810208 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2009-09-01 20:10:33 +00:00
parent 566aaf28e7
commit e5cb7f668a
21 changed files with 20 additions and 70 deletions

View File

@ -1,50 +0,0 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
</head>
<body>
<div>
Analyzer for Simplified Chinese, which indexes words.
</div>
<div>
<font color="#FF0000">
WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental. The APIs
and file formats introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</div>
<div>
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
<ul>
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
<li>SmartChineseAnalyzer (in this package): Index words (attempt to segment Chinese text into words) as tokens.
</ul>
Example phrase "我是中国人"
<ol>
<li>ChineseAnalyzer: 我-是-中-国-人</li>
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
</ol>
</div>
</body>
</html>

View File

@ -28,7 +28,7 @@ import java.util.Properties;
* SmartChineseAnalyzer has a built-in dictionary and stopword list out-of-box.
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cn.smart;
/**
* Internal SmartChineseAnalyzer character type constants.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -32,7 +32,7 @@ import org.apache.lucene.util.AttributeSource;
* The output tokens can then be broken into words with {@link WordTokenFilter}
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.lucene.analysis.cn;
package org.apache.lucene.analysis.cn.smart;
import java.io.IOException;
import java.io.InputStream;
@ -51,7 +51,7 @@ import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
* Thanks to ICTCLAS for their hard work, and for contributing the data under the Apache 2 License!
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -22,7 +22,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter; // for javadoc
/**
* SmartChineseAnalyzer utility constants and methods
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.SegTokenFilter;
/**
* Segment a sentence of Chinese text into words.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
* A {@link TokenFilter} that breaks sentences into words.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -20,7 +20,7 @@ package org.apache.lucene.analysis.cn.smart;
/**
* Internal SmartChineseAnalyzer token type constants
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -27,7 +27,7 @@ import java.io.UnsupportedEncodingException;
* Contains methods for dealing with GB2312 encoding.
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -32,7 +32,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
* For each start offset, a list of possible token pairs is stored.
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -35,7 +35,7 @@ import org.apache.lucene.analysis.cn.smart.AnalyzerProfile;
/**
* SmartChineseAnalyzer Bigram dictionary.
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -27,7 +27,7 @@ import org.apache.lucene.analysis.cn.smart.hhmm.PathNode;//javadoc @link
/**
* Finds the optimal segmentation of a sentence into Chinese words
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -23,7 +23,7 @@ package org.apache.lucene.analysis.cn.smart.hhmm;
* Used by {@link BiSegGraph} to maximize the segmentation with the Viterbi algorithm.
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -29,7 +29,7 @@ import java.util.Map;
* For each start offset, a list of possible tokens is stored.
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -24,7 +24,7 @@ import org.apache.lucene.analysis.cn.smart.WordType; // for javadocs
/**
* SmartChineseAnalyzer internal token
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.cn.smart.WordType;
* Additionally, all punctuation is converted into {@link Utility#COMMON_DELIMITER}
* </p>
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -22,7 +22,7 @@ import java.util.Arrays;
/**
* A pair of tokens in {@link SegGraph}
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -37,7 +37,7 @@ import org.apache.lucene.analysis.cn.smart.Utility;
* SmartChineseAnalyzer Word Dictionary
*
* <p><font color="#FF0000">
* WARNING: The status of the analyzers/smartcn <b>analysis.cn</b> package is experimental.
* WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental.
* The APIs and file formats introduced here might change in the future and will not be
* supported anymore in such a case.</font>
* </p>

View File

@ -15,7 +15,7 @@
* limitations under the License.
*/
package org.apache.lucene.analysis.cn;
package org.apache.lucene.analysis.cn.smart;
import java.io.FileNotFoundException;
import java.io.IOException;