mirror of https://github.com/apache/lucene.git
LUCENE-1728: Splitted contrib/analyzers into common and smartcn. Smartcn depends on a large dictionary that causes the analyzers jar to grow up to 3MB compressed size.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@797150 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
91e37bf1fb
commit
999f6157c7
|
@ -21,10 +21,52 @@
|
||||||
|
|
||||||
<description>
|
<description>
|
||||||
Additional Analyzers
|
Additional Analyzers
|
||||||
|
- common: Additional Analyzers
|
||||||
|
- smartcn: Smart Analyzer for Simplified Chinese Text
|
||||||
</description>
|
</description>
|
||||||
|
|
||||||
<property name="javac.source" value="1.4" />
|
<target name="common">
|
||||||
<property name="javac.target" value="1.4" />
|
<ant dir="common" />
|
||||||
|
</target>
|
||||||
|
|
||||||
<import file="../contrib-build.xml"/>
|
<target name="smartcn">
|
||||||
|
<ant dir="smartcn" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="default" depends="common,smartcn" />
|
||||||
|
|
||||||
|
<target name="clean">
|
||||||
|
<ant dir="common" target="clean" />
|
||||||
|
<ant dir="smartcn" target="clean" />
|
||||||
|
</target>
|
||||||
|
<target name="compile-core">
|
||||||
|
<ant dir="common" target="compile-core" />
|
||||||
|
<ant dir="smartcn" target="compile-core" />
|
||||||
|
</target>
|
||||||
|
<target name="compile-test">
|
||||||
|
<ant dir="common" target="compile-test" />
|
||||||
|
<ant dir="smartcn" target="compile-test" />
|
||||||
|
</target>
|
||||||
|
<target name="test">
|
||||||
|
<ant dir="common" target="test" />
|
||||||
|
<ant dir="smartcn" target="test" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="build-artifacts-and-tests" depends="default,compile-test" />
|
||||||
|
|
||||||
|
<target name="dist-maven" depends="default">
|
||||||
|
<ant dir="common" target="dist-maven" />
|
||||||
|
<ant dir="smartcn" target="dist-maven" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="javadocs">
|
||||||
|
<ant dir="common" target="javadocs" />
|
||||||
|
<ant dir="smartcn" target="javadocs" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="javadocs-index.html">
|
||||||
|
<ant dir="common" target="javadocs-index.html" />
|
||||||
|
<ant dir="smartcn" target="javadocs-index.html" />
|
||||||
|
</target>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<!--
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<project name="analyzers" default="default">
|
||||||
|
|
||||||
|
<description>
|
||||||
|
Additional Analyzers
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<property name="javac.source" value="1.4" />
|
||||||
|
<property name="javac.target" value="1.4" />
|
||||||
|
|
||||||
|
<property name="build.dir" location="../../../build/contrib/analyzers/common" />
|
||||||
|
<property name="dist.dir" location="../../../dist/contrib/analyzers/common" />
|
||||||
|
<property name="maven.dist.dir" location="../../../dist/maven" />
|
||||||
|
|
||||||
|
<import file="../../contrib-build.xml"/>
|
||||||
|
|
||||||
|
<path id="test.classpath">
|
||||||
|
<path refid="classpath"/>
|
||||||
|
<pathelement location="../../../build/classes/test/"/>
|
||||||
|
<path refid="junit-path"/>
|
||||||
|
<pathelement location="${build.dir}/classes/java"/>
|
||||||
|
</path>
|
||||||
|
</project>
|
|
@ -0,0 +1,24 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Analyzer for Chinese, Japanese, and Korean, which indexes bigrams (overlapping groups of two adjacent Han characters).
|
||||||
|
<p>
|
||||||
|
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
|
||||||
|
<ul>
|
||||||
|
<li>ChineseAnalyzer (in the analyzers/cn package): Index unigrams (individual Chinese characters) as a token.
|
||||||
|
<li>CJKAnalyzer (in this package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
|
||||||
|
<li>SmartChineseAnalyzer (in the analyzers/smartcn package): Index words (attempt to segment Chinese text into words) as tokens.
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
Example phrase: "我是中国人"
|
||||||
|
<ol>
|
||||||
|
<li>ChineseAnalyzer: 我-是-中-国-人</li>
|
||||||
|
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
|
||||||
|
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
|
||||||
|
</ol>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
|
@ -0,0 +1,24 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
Analyzer for Chinese, which indexes unigrams (individuals chinese characters).
|
||||||
|
<p>
|
||||||
|
Three analyzers are provided for Chinese, each of which treats Chinese text in a different way.
|
||||||
|
<ul>
|
||||||
|
<li>ChineseAnalyzer (in this package): Index unigrams (individual Chinese characters) as a token.
|
||||||
|
<li>CJKAnalyzer (in the analyzers/cjk package): Index bigrams (overlapping groups of two adjacent Chinese characters) as tokens.
|
||||||
|
<li>SmartChineseAnalyzer (in the analyzers/smartcn package): Index words (attempt to segment Chinese text into words) as tokens.
|
||||||
|
</ul>
|
||||||
|
|
||||||
|
Example phrase: "我是中国人"
|
||||||
|
<ol>
|
||||||
|
<li>ChineseAnalyzer: 我-是-中-国-人</li>
|
||||||
|
<li>CJKAnalyzer: 我是-是中-中国-国人</li>
|
||||||
|
<li>SmartChineseAnalyzer: 我-是-中国-人</li>
|
||||||
|
</ol>
|
||||||
|
</p>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue